{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "There are 4 GPU(s) available.\nWe will use the GPU: TITAN X (Pascal)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "# If there's a GPU available...\n",
    "if torch.cuda.is_available():    \n",
    "\n",
    "    # Tell PyTorch to use the GPU.    \n",
    "    device = torch.device(\"cuda:1\")\n",
    "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
    "\n",
    "    print('We will use the GPU:', torch.cuda.get_device_name(1))\n",
    "\n",
    "# If not...\n",
    "else:\n",
    "    print('No GPU available, using the CPU instead.')\n",
    "    device = torch.device(\"cpu\")\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Hyper parameters values\n",
    "\n",
    "# Set the maximum sequence length.\n",
    "MAX_LEN = 512\n",
    "\n",
    "# Set the batch size.  \n",
    "batch_size = 16 \n",
    "\n",
    "# Number of training epochs (authors recommend between 2 and 4)\n",
    "epochs = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Parsing the dataset.tsv file ....\n     DONE.\n---------------------\nchecking for duplicates\nnumber of duplicate values between test and train is: 184\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "print('Parsing the dataset.tsv file ....')\n",
    "# comments = pd.read_csv('./data/attack_annotated_comments.tsv', sep = '\\t', index_col = 0)\n",
    "train = pd.read_csv('./data/balanced_data/train.tsv', sep = '\\t')\n",
    "test = pd.read_csv('./data/balanced_data/test.tsv', sep = '\\t')\n",
    "\n",
    "print('     DONE.')\n",
    "print('---------------------')\n",
    "print('checking for duplicates')\n",
    "print('number of duplicate values between test and train is:', \n",
    "len(set(train['story_id'].unique()) & set(test['story_id'].unique()))\n",
    "      )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "label\n",
       "0    1036\n",
       "1    1062\n",
       "dtype: int64"
      ]
     },
     "metadata": {},
     "execution_count": 117
    }
   ],
   "source": [
    "test.groupby('label').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   story_id                                           raw_text  label  len\n",
       "0   6772845  kiev, january 21 (ria novosti) - former ukrain...      1  160\n",
       "1  35455458  the ukrainian government has called for local ...      0   56\n",
       "2  38817309  investigators probing the shooting down of mal...      0  343\n",
       "3   7765503  kharkov, april 21 (ria novosti) - russia and u...      1  227\n",
       "4  34816385  moscow, june 27 (ria novosti) - osce monitors ...      1  250"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>story_id</th>\n      <th>raw_text</th>\n      <th>label</th>\n      <th>len</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6772845</td>\n      <td>kiev, january 21 (ria novosti) - former ukrain...</td>\n      <td>1</td>\n      <td>160</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>35455458</td>\n      <td>the ukrainian government has called for local ...</td>\n      <td>0</td>\n      <td>56</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38817309</td>\n      <td>investigators probing the shooting down of mal...</td>\n      <td>0</td>\n      <td>343</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>7765503</td>\n      <td>kharkov, april 21 (ria novosti) - russia and u...</td>\n      <td>1</td>\n      <td>227</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34816385</td>\n      <td>moscow, june 27 (ria novosti) - osce monitors ...</td>\n      <td>1</td>\n      <td>250</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 3
    }
   ],
   "source": [
    "def get_len(text):\n",
    "    return len(text.split())\n",
    "\n",
    "# Get the length of raw text\n",
    "train['len'] = train['raw_text'].apply(get_len)\n",
    "\n",
    "# Display the first five rows of the table\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_split(text1):\n",
    "    split_size = 200\n",
    "    overlap_size = 50\n",
    "    v1 = split_size - overlap_size\n",
    "    l_total = []\n",
    "    l_parcial = []\n",
    "    if len(text1.split())//v1 >0:\n",
    "        n = len(text1.split())//v1\n",
    "    else: \n",
    "        n = 1\n",
    "    for w in range(n):\n",
    "        if w == 0:\n",
    "            l_parcial = text1.split()[:split_size]\n",
    "            l_total.append(\" \".join(l_parcial))\n",
    "        else:\n",
    "            l_parcial = text1.split()[w*v1:w*v1 + split_size]\n",
    "            l_total.append(\" \".join(l_parcial))\n",
    "    return l_total"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   story_id                                           raw_text  label  len  \\\n",
       "0   6772845  kiev, january 21 (ria novosti) - former ukrain...      1  160   \n",
       "1  35455458  the ukrainian government has called for local ...      0   56   \n",
       "2  38817309  investigators probing the shooting down of mal...      0  343   \n",
       "3   7765503  kharkov, april 21 (ria novosti) - russia and u...      1  227   \n",
       "4  34816385  moscow, june 27 (ria novosti) - osce monitors ...      1  250   \n",
       "\n",
       "                                          text_split  \n",
       "0  [kiev, january 21 (ria novosti) - former ukrai...  \n",
       "1  [the ukrainian government has called for local...  \n",
       "2  [investigators probing the shooting down of ma...  \n",
       "3  [kharkov, april 21 (ria novosti) - russia and ...  \n",
       "4  [moscow, june 27 (ria novosti) - osce monitors...  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>story_id</th>\n      <th>raw_text</th>\n      <th>label</th>\n      <th>len</th>\n      <th>text_split</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6772845</td>\n      <td>kiev, january 21 (ria novosti) - former ukrain...</td>\n      <td>1</td>\n      <td>160</td>\n      <td>[kiev, january 21 (ria novosti) - former ukrai...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>35455458</td>\n      <td>the ukrainian government has called for local ...</td>\n      <td>0</td>\n      <td>56</td>\n      <td>[the ukrainian government has called for local...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38817309</td>\n      <td>investigators probing the shooting down of mal...</td>\n      <td>0</td>\n      <td>343</td>\n      <td>[investigators probing the shooting down of ma...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>7765503</td>\n      <td>kharkov, april 21 (ria novosti) - russia and u...</td>\n      <td>1</td>\n      <td>227</td>\n      <td>[kharkov, april 21 (ria novosti) - russia and ...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>34816385</td>\n      <td>moscow, june 27 (ria novosti) - osce monitors ...</td>\n      <td>1</td>\n      <td>250</td>\n      <td>[moscow, june 27 (ria novosti) - osce monitors...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "source": [
    "train['text_split'] = train['raw_text'].apply(get_split)\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "HBox(children=(FloatProgress(value=0.0, max=8390.0), HTML(value='')))",
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "9124d7cfdf4443ffa2cf5dbe4c0fff0c"
      }
     },
     "metadata": {}
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\nlength of train_split_v is: 19057\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   story_id  chunk_num                                           raw_text  \\\n",
       "0   6772845          0  kiev, january 21 (ria novosti) - former ukrain...   \n",
       "1  35455458          0  the ukrainian government has called for local ...   \n",
       "2  38817309          0  investigators probing the shooting down of mal...   \n",
       "3  38817309          1  investigators probing the shooting down of mal...   \n",
       "4   7765503          0  kharkov, april 21 (ria novosti) - russia and u...   \n",
       "\n",
       "                                          text_chunk  label  \n",
       "0  kiev, january 21 (ria novosti) - former ukrain...      1  \n",
       "1  the ukrainian government has called for local ...      0  \n",
       "2  investigators probing the shooting down of mal...      0  \n",
       "3  at ukraine's military. members of the joint in...      0  \n",
       "4  kharkov, april 21 (ria novosti) - russia and u...      1  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>story_id</th>\n      <th>chunk_num</th>\n      <th>raw_text</th>\n      <th>text_chunk</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6772845</td>\n      <td>0</td>\n      <td>kiev, january 21 (ria novosti) - former ukrain...</td>\n      <td>kiev, january 21 (ria novosti) - former ukrain...</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>35455458</td>\n      <td>0</td>\n      <td>the ukrainian government has called for local ...</td>\n      <td>the ukrainian government has called for local ...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>38817309</td>\n      <td>0</td>\n      <td>investigators probing the shooting down of mal...</td>\n      <td>investigators probing the shooting down of mal...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>38817309</td>\n      <td>1</td>\n      <td>investigators probing the shooting down of mal...</td>\n      <td>at ukraine's military. members of the joint in...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7765503</td>\n      <td>0</td>\n      <td>kharkov, april 21 (ria novosti) - russia and u...</td>\n      <td>kharkov, april 21 (ria novosti) - russia and u...</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "source": [
    "# create a row split version of dataset \n",
    "\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "tmp = []\n",
    "\n",
    "for i in tqdm(range(len(train))):\n",
    "    for j in range(len(train.iloc[i].text_split)):\n",
    "#         chunk_num = str(train.iloc[i]['story_id']) + '_' + str(j)\n",
    "        chunk_num = j\n",
    "        tmp.append(\n",
    "        {'story_id': train.iloc[i]['story_id'],\n",
    "            'chunk_num': chunk_num,\n",
    "            'raw_text': train.iloc[i]['raw_text'],\n",
    "            'text_chunk': train.iloc[i]['text_split'][j],\n",
    "            'label': train.iloc[i].label}\n",
    "        )\n",
    "\n",
    "train_split_v = pd.DataFrame(tmp) \n",
    "# train.head()\n",
    "print('length of train_split_v is:', len(train_split_v))\n",
    "train_split_v.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "length of train_split_v is: 19057\n"
     ]
    }
   ],
   "source": [
    "print('length of train_split_v is:', len(train_split_v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import textwrap\n",
    "# import random \n",
    "\n",
    "# # wrap text to 80 characters.\n",
    "\n",
    "# wrapper = textwrap.TextWrapper(width = 100)\n",
    "\n",
    "# # filter to just \"attack\" comments.\n",
    "\n",
    "# examples = train.query('label')['raw_text']\n",
    "\n",
    "# # Randomly choose some examples\n",
    "\n",
    "# for i in range(10):\n",
    "#     j = random.choice(examples.index)\n",
    "    \n",
    "#     print('')\n",
    "#     print(wrapper.fill(examples[j]))\n",
    "#     print('')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "loading BERT tokenizer....\n",
      "    DONE.\n"
     ]
    }
   ],
   "source": [
    "from transformers import BertTokenizer\n",
    "\n",
    "# load the BERT tokenizer, \n",
    "print('loading BERT tokenizer....')\n",
    "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case = True)\n",
    "print('    DONE.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Retrieve the text from the first comment.\n",
    "# text = train.iloc[10].raw_text\n",
    "\n",
    "# tokens = tokenizer.tokenize(text)\n",
    "\n",
    "# # Print the original text\n",
    "# print('comment 0 (not an attack) contains {:,} wordpiece tokens.'.format(len(tokens)))\n",
    "# print('\\nOriginal comment text:\\n')\n",
    "# print(wrapper.fill(text))\n",
    "\n",
    "\n",
    "# # print out the list of tokens\n",
    "# print('**** First 512 tokens:*****\\n')\n",
    "# print(wrapper.fill(str(' '.join(tokens[0:512]))))\n",
    "\n",
    "# print('')\n",
    "\n",
    "# print('\\n****** Remaining {:,} tokens: *****\\n'.format(len(tokens) - 512))\n",
    "# print(wrapper.fill(str(' '.join(tokens[512:]))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # First truncate the text to remove the last 79 tokens (which begin with the words \"are made in\"). \n",
    "# last_char = text.find('are made in')\n",
    "\n",
    "# # Truncate the text to only what fits in the 512 tokens.\n",
    "# text = text[0:last_char]\n",
    "\n",
    "# # Estimate the number of words in the comment by spliting it on whitespace.\n",
    "# # first remove all double spaces.\n",
    "# text = text.replace('  ', ' ')\n",
    "# num_words = len(text.split(' '))\n",
    "# print('Comment contains ~{:,} words.'.format(num_words))\n",
    "\n",
    "# # Estimate the number of sentences by counting up the periods.\n",
    "# num_sens = text.count('. ')\n",
    "# print('Comment contains ~{:,} sentences.'.format(num_sens))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Tokenizing texts.....\n"
     ]
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "HBox(children=(FloatProgress(value=0.0, max=19057.0), HTML(value='')))",
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "662189f102b344a2947ac0d253f242ac"
      }
     },
     "metadata": {}
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      " Read 0 texts.\n",
      " Read 200 texts.\n",
      " Read 400 texts.\n",
      " Read 600 texts.\n",
      " Read 800 texts.\n",
      " Read 1,000 texts.\n",
      " Read 1,200 texts.\n",
      " Read 1,400 texts.\n",
      " Read 1,600 texts.\n",
      " Read 1,800 texts.\n",
      " Read 2,000 texts.\n",
      " Read 2,200 texts.\n",
      " Read 2,400 texts.\n",
      " Read 2,600 texts.\n",
      " Read 2,800 texts.\n",
      " Read 3,000 texts.\n",
      " Read 3,200 texts.\n",
      " Read 3,400 texts.\n",
      " Read 3,600 texts.\n",
      " Read 3,800 texts.\n",
      " Read 4,000 texts.\n",
      " Read 4,200 texts.\n",
      " Read 4,400 texts.\n",
      " Read 4,600 texts.\n",
      " Read 4,800 texts.\n",
      " Read 5,000 texts.\n",
      " Read 5,200 texts.\n",
      " Read 5,400 texts.\n",
      " Read 5,600 texts.\n",
      " Read 5,800 texts.\n",
      " Read 6,000 texts.\n",
      " Read 6,200 texts.\n",
      " Read 6,400 texts.\n",
      " Read 6,600 texts.\n",
      " Read 6,800 texts.\n",
      " Read 7,000 texts.\n",
      " Read 7,200 texts.\n",
      " Read 7,400 texts.\n",
      " Read 7,600 texts.\n",
      " Read 7,800 texts.\n",
      " Read 8,000 texts.\n",
      " Read 8,200 texts.\n",
      " Read 8,400 texts.\n",
      " Read 8,600 texts.\n",
      " Read 8,800 texts.\n",
      " Read 9,000 texts.\n",
      " Read 9,200 texts.\n",
      " Read 9,400 texts.\n",
      " Read 9,600 texts.\n",
      " Read 9,800 texts.\n",
      " Read 10,000 texts.\n",
      " Read 10,200 texts.\n",
      " Read 10,400 texts.\n",
      " Read 10,600 texts.\n",
      " Read 10,800 texts.\n",
      " Read 11,000 texts.\n",
      " Read 11,200 texts.\n",
      " Read 11,400 texts.\n",
      " Read 11,600 texts.\n",
      " Read 11,800 texts.\n",
      " Read 12,000 texts.\n",
      " Read 12,200 texts.\n",
      " Read 12,400 texts.\n",
      " Read 12,600 texts.\n",
      " Read 12,800 texts.\n",
      " Read 13,000 texts.\n",
      " Read 13,200 texts.\n",
      " Read 13,400 texts.\n",
      " Read 13,600 texts.\n",
      " Read 13,800 texts.\n",
      " Read 14,000 texts.\n",
      " Read 14,200 texts.\n",
      " Read 14,400 texts.\n",
      " Read 14,600 texts.\n",
      " Read 14,800 texts.\n",
      " Read 15,000 texts.\n",
      " Read 15,200 texts.\n",
      " Read 15,400 texts.\n",
      " Read 15,600 texts.\n",
      " Read 15,800 texts.\n",
      " Read 16,000 texts.\n",
      " Read 16,200 texts.\n",
      " Read 16,400 texts.\n",
      " Read 16,600 texts.\n",
      " Read 16,800 texts.\n",
      " Read 17,000 texts.\n",
      " Read 17,200 texts.\n",
      " Read 17,400 texts.\n",
      " Read 17,600 texts.\n",
      " Read 17,800 texts.\n",
      " Read 18,000 texts.\n",
      " Read 18,200 texts.\n",
      " Read 18,400 texts.\n",
      " Read 18,600 texts.\n",
      " Read 18,800 texts.\n",
      " Read 19,000 texts.\n",
      "\n",
      "DONE\n",
      "    19,057 comments\n",
      "   Min length: 18 tokens\n",
      "   Max length: 395 tokens\n",
      "Median length: 251.0 tokens\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "\n",
    "# Tokenize all the sentences and map the tokens to their word IDs.\n",
    "\n",
    "input_ids = []\n",
    "\n",
    "# Record the length of each sequence (after truncating to 512).\n",
    "\n",
    "lengths = []\n",
    "\n",
    "print('Tokenizing texts.....')\n",
    "\n",
    "# For every sentence....\n",
    "for text in tqdm(train_split_v.text_chunk):\n",
    "    # Report progress.\n",
    "    if ((len(input_ids) % 200) == 0):\n",
    "        print(' Read {:,} texts.'.format(len(input_ids)))\n",
    "        \n",
    "    # endoce will: \n",
    "    # (1) Tokenize the sentence\n",
    "    # (2) Prepend the \"[CLS]\" token to the start.\n",
    "    # (3) Append the \"[SEP]\" token to the end.\n",
    "    # (4) Map tokens to their IDs. \n",
    "    encoded_text = tokenizer.encode(\n",
    "        text,                          #sentence to encode.\n",
    "        add_special_tokens = True,   # Add '[CLS]' and '[SEP]'\n",
    "#             max_length = 512,            # Truncate all the sentences.\n",
    "#             return_tensors = 'pt'        # Return pytorch tensors.\n",
    "    )\n",
    "    # Add the encoded sentence to the list\n",
    "    input_ids.append(encoded_text)\n",
    "    \n",
    "    # Record the truncated length.\n",
    "    lengths.append(len(encoded_text))\n",
    "    \n",
    "print('DONE')\n",
    "print('{:>10,} comments'.format(len(input_ids)))\n",
    "\n",
    "print('   Min length: {:,} tokens'.format(min(lengths)))\n",
    "print('   Max length: {:,} tokens'.format(max(lengths)))\n",
    "print('Median length: {:,} tokens'.format(np.median(lengths)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "  7,380 label as 1 \n 11,677 labels as 0\n"
     ]
    }
   ],
   "source": [
    "chunk_nums = train_split_v.chunk_num.to_numpy().astype(int)\n",
    "story_ids = train_split_v.story_id.to_numpy().astype(int)\n",
    "\n",
    "# ALso retreive the labels as a list\n",
    "\n",
    "# Get the labels from dataframe, and convert from booleans to ints.\n",
    "\n",
    "labels = train_split_v.label.to_numpy().astype(int)\n",
    "print('{:>7,} label as 1 '.format(np.sum(labels)))\n",
    "print('{:>7,} labels as 0'.format(len(labels)-np.sum(labels)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import matplotlib.pyplot as plt\n",
    "# import seaborn as sns\n",
    "# import numpy as np\n",
    "\n",
    "# sns.set(style='darkgrid')\n",
    "\n",
    "# # Increase the plot size and font size.\n",
    "# sns.set(font_scale=1.5)\n",
    "# plt.rcParams['figure.figsize'] = (10,5)\n",
    "\n",
    "# # Truncate any comment lengths greater tahn 512.\n",
    "# lengths = [min(l, 512) for l in lengths]\n",
    "\n",
    "# # Plot the distribution of comment lengths.\n",
    "# sns.distplot(lengths, kde=False, rug=False)\n",
    "# plt.title('Comment lengths')\n",
    "# plt.xlabel('Comment length')\n",
    "# plt.ylabel ('# of comments')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # count the number of sentences that had to be truncated to 512 tokens. \n",
    "# num_truncated = lengths.count(512)\n",
    "\n",
    "# # compare this to the total number of training sentences.\n",
    "# num_sentences = len(lengths)\n",
    "# prcnt = float(num_truncated)/ float(num_sentences)\n",
    "# print('{:,} of {:,} sentences ({:.1%}) in the training set are longer than 512 tokens.'.format(num_truncated, num_sentences, prcnt))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\n",
      "Padding/truncating all sentences to 512 values...\n",
      "\n",
      "Padding token: \"[PAD]\", ID: 0\n",
      "\n",
      "Done.\n"
     ]
    }
   ],
   "source": [
    "# We'll borrow the `pad_sequences` utility function to do this.\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "\n",
    "# Set the maximum sequence length.\n",
    "MAX_LEN = 512\n",
    "\n",
    "print('\\nPadding/truncating all sentences to %d values...' % MAX_LEN)\n",
    "\n",
    "print('\\nPadding token: \"{:}\", ID: {:}'.format(tokenizer.pad_token, tokenizer.pad_token_id))\n",
    "\n",
    "# Pad our input tokens with value 0.\n",
    "# \"post\" indicates that we want to pad and truncate at the end of the sequence,\n",
    "# as opposed to the beginning.\n",
    "input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype=\"long\", \n",
    "                          value=0, truncating=\"post\", padding=\"post\")\n",
    "\n",
    "print('\\nDone.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create attention masks\n",
    "attention_masks = []\n",
    "\n",
    "# For each sentence...\n",
    "for sent in input_ids:\n",
    "    \n",
    "    # Create the attention mask.\n",
    "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
    "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
    "    att_mask = [int(token_id > 0) for token_id in sent]\n",
    "    \n",
    "    # Store the attention mask for this sentence.\n",
    "    attention_masks.append(att_mask)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use train_test_split to split our data into train and validation sets for\n",
    "# training\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Use 90% for training and 10% for validation.\n",
    "train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, \n",
    "                                                            random_state=2018, test_size=0.1)\n",
    "# Do the same for the masks.\n",
    "train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,\n",
    "                                             random_state=2018, test_size=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert all inputs and labels into torch tensors, the required datatype \n",
    "# for our model.\n",
    "train_inputs = torch.tensor(train_inputs)\n",
    "validation_inputs = torch.tensor(validation_inputs)\n",
    "\n",
    "train_labels = torch.tensor(train_labels)\n",
    "validation_labels = torch.tensor(validation_labels)\n",
    "\n",
    "train_masks = torch.tensor(train_masks)\n",
    "validation_masks = torch.tensor(validation_masks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
    "\n",
    "# The DataLoader needs to know our batch size for training, so we specify it here.\n",
    "# For fine-tuning BERT on a specific task, the authors recommend a batch size of 16 or 32.\n",
    "\n",
    "batch_size = 32\n",
    "\n",
    "# Create the DataLoader for our training set.\n",
    "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n",
    "train_sampler = RandomSampler(train_data)\n",
    "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n",
    "\n",
    "# Create the DataLoader for our validation set.\n",
    "validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)\n",
    "validation_sampler = SequentialSampler(validation_data)\n",
    "validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "from transformers import BertForSequenceClassification, AdamW, BertConfig\n",
    "\n",
    "# Load BertForSequenceClassification, the pretrained BERT model with a single \n",
    "# linear classification layer on top. \n",
    "model = BertForSequenceClassification.from_pretrained(\n",
    "    \"bert-base-uncased\", # Use the 12-layer BERT model, with an uncased vocab.\n",
    "    num_labels = 2, # The number of output labels--2 for binary classification.\n",
    "                    # You can increase this for multi-class tasks.   \n",
    "    output_attentions = False, # Whether the model returns attentions weights.\n",
    "    output_hidden_states = False, # Whether the model returns all hidden-states.\n",
    ")\n",
    "model_path = './model_save/pytorch_model_2.bin'\n",
    "# state_dict = torch.load(model_path)\n",
    "# model.load_state_dict(state_dict)\n",
    "# Tell pytorch to run this model on the GPU.\n",
    "if torch.cuda.is_available():\n",
    "        model.cuda()\n",
    "        model = torch.nn.DataParallel(model)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "module.bert.embeddings.word_embeddings.weight torch.Size([30522, 768])\nmodule.bert.embeddings.position_embeddings.weight torch.Size([512, 768])\nmodule.bert.embeddings.token_type_embeddings.weight torch.Size([2, 768])\nmodule.bert.embeddings.LayerNorm.weight torch.Size([768])\nmodule.bert.embeddings.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.0.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.0.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.0.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.0.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.0.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.0.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.0.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.0.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.0.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.0.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.0.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.0.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.0.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.0.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.0.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.0.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.1.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.1.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.1.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.1.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.1.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.1.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.1.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.1.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.1.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.1.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.1.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.1.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.1.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.1.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.1.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.1.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.2.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.2.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.2.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.2.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.2.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.2.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.2.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.2.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.2.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.2.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.2.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.2.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.2.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.2.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.2.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.2.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.3.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.3.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.3.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.3.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.3.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.3.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.3.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.3.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.3.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.3.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.3.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.3.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.3.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.3.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.3.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.3.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.4.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.4.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.4.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.4.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.4.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.4.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.4.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.4.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.4.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.4.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.4.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.4.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.4.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.4.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.4.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.4.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.5.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.5.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.5.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.5.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.5.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.5.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.5.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.5.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.5.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.5.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.5.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.5.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.5.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.5.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.5.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.5.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.6.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.6.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.6.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.6.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.6.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.6.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.6.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.6.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.6.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.6.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.6.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.6.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.6.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.6.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.6.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.6.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.7.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.7.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.7.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.7.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.7.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.7.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.7.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.7.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.7.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.7.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.7.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.7.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.7.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.7.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.7.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.7.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.8.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.8.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.8.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.8.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.8.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.8.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.8.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.8.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.8.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.8.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.8.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.8.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.8.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.8.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.8.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.8.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.9.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.9.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.9.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.9.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.9.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.9.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.9.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.9.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.9.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.9.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.9.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.9.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.9.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.9.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.9.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.9.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.10.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.10.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.10.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.10.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.10.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.10.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.10.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.10.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.10.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.10.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.10.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.10.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.10.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.10.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.10.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.10.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.11.attention.self.query.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.11.attention.self.query.bias torch.Size([768])\nmodule.bert.encoder.layer.11.attention.self.key.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.11.attention.self.key.bias torch.Size([768])\nmodule.bert.encoder.layer.11.attention.self.value.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.11.attention.self.value.bias torch.Size([768])\nmodule.bert.encoder.layer.11.attention.output.dense.weight torch.Size([768, 768])\nmodule.bert.encoder.layer.11.attention.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.11.attention.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.11.attention.output.LayerNorm.bias torch.Size([768])\nmodule.bert.encoder.layer.11.intermediate.dense.weight torch.Size([3072, 768])\nmodule.bert.encoder.layer.11.intermediate.dense.bias torch.Size([3072])\nmodule.bert.encoder.layer.11.output.dense.weight torch.Size([768, 3072])\nmodule.bert.encoder.layer.11.output.dense.bias torch.Size([768])\nmodule.bert.encoder.layer.11.output.LayerNorm.weight torch.Size([768])\nmodule.bert.encoder.layer.11.output.LayerNorm.bias torch.Size([768])\nmodule.bert.pooler.dense.weight torch.Size([768, 768])\nmodule.bert.pooler.dense.bias torch.Size([768])\nmodule.classifier.weight torch.Size([2, 768])\nmodule.classifier.bias torch.Size([2])\n----\n"
     ]
    }
   ],
   "source": [
    "# Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
    "# I believe the 'W' stands for 'Weight Decay fix\"\n",
    "optimizable_params = []\n",
    "for name, p in model.named_parameters():\n",
    "    if p.requires_grad:\n",
    "#         print(p.name, p.shape)\n",
    "        print(name, p.shape)\n",
    "        if name == 'classifier.bias' or name == 'classifier.weight':\n",
    "            optimizable_params.append(p)\n",
    "# for p in model.parameters():\n",
    "#     if p.requires_grad:\n",
    "# #         print(p.name, p.shape)\n",
    "# #         print(name, p.shape)\n",
    "#         if p.name == 'classifer.bias' or p.name == 'classifier.weight':\n",
    "#             optimizable_params.append(p)\n",
    "print(\"----\")            \n",
    "# print(optimizable_params)\n",
    "# for p in optimizable_params:\n",
    "#     print(p.shape)\n",
    "optimizer = AdamW(model.parameters(),#optimizable_params,\n",
    "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
    "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
    "                )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import get_linear_schedule_with_warmup\n",
    "\n",
    "# Number of training epochs (authors recommend between 2 and 4)\n",
    "epochs = 3\n",
    "\n",
    "# Total number of training steps is number of batches * number of epochs.\n",
    "total_steps = len(train_dataloader) * epochs\n",
    "\n",
    "# Create the learning rate scheduler.\n",
    "scheduler = get_linear_schedule_with_warmup(optimizer, \n",
    "                                            num_warmup_steps = 0, # Default value in run_glue.py\n",
    "                                            num_training_steps = total_steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Function to calculate the accuracy of our predictions vs labels\n",
    "def flat_accuracy(preds, labels):\n",
    "    pred_flat = np.argmax(preds, axis=1).flatten()\n",
    "    labels_flat = labels.flatten()\n",
    "    return np.sum(pred_flat == labels_flat) / len(labels_flat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Our performance metric for the test set.\n",
    "from sklearn.metrics import roc_auc_score\n",
    "\n",
    "def calc_roc_auc (true_labels, pred_labels):\n",
    "\n",
    "    # Calculate the ROC AUC\n",
    "    auc = roc_auc_score(true_labels, pred_labels)\n",
    "\n",
    "#     print('Test ROC AUC: %.3f' %auc)\n",
    "    return auc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import datetime\n",
    "\n",
    "def format_time(elapsed):\n",
    "    '''\n",
    "    Takes a time in seconds and returns a string hh:mm:ss\n",
    "    '''\n",
    "    # Round to the nearest second.\n",
    "    elapsed_rounded = int(round((elapsed)))\n",
    "    \n",
    "    # Format as hh:mm:ss\n",
    "    return str(datetime.timedelta(seconds=elapsed_rounded))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def model_save():\n",
    "    import os\n",
    "\n",
    "    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()\n",
    "\n",
    "    output_dir = './model_save/'\n",
    "\n",
    "    # Create output directory if needed\n",
    "    if not os.path.exists(output_dir):\n",
    "        os.makedirs(output_dir)\n",
    "\n",
    "    print(\"Saving model to %s\" % output_dir)\n",
    "\n",
    "    # Save a trained model, configuration and tokenizer using `save_pretrained()`.\n",
    "    # They can then be reloaded using `from_pretrained()`\n",
    "    model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training\n",
    "    model_to_save.save_pretrained(output_dir)\n",
    "    tokenizer.save_pretrained(output_dir)\n",
    "\n",
    "    # Good practice: save your training arguments together with the trained model\n",
    "    # torch.save(args, os.path.join(output_dir, 'training_args.bin'))\n",
    "    return"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "======== Epoch 1 / 2 ========\n",
      "Training...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/maryam/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/_functions.py:61: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Batch   100  of  1,044.    Elapsed: 0:00:40.\n",
      "  Batch   200  of  1,044.    Elapsed: 0:01:22.\n",
      "  Batch   300  of  1,044.    Elapsed: 0:02:02.\n",
      "  Batch   400  of  1,044.    Elapsed: 0:02:43.\n",
      "  Batch   500  of  1,044.    Elapsed: 0:03:24.\n",
      "  Batch   600  of  1,044.    Elapsed: 0:04:04.\n",
      "  Batch   700  of  1,044.    Elapsed: 0:04:45.\n",
      "  Batch   800  of  1,044.    Elapsed: 0:05:26.\n",
      "  Batch   900  of  1,044.    Elapsed: 0:06:07.\n",
      "  Batch 1,000  of  1,044.    Elapsed: 0:06:47.\n",
      "\n",
      "  Average training loss: 0.00\n",
      "  Training epcoh took: 0:07:05\n",
      "\n",
      "Running Validation...\n",
      "Saving model to ./model_save/\n",
      "  Accuracy: 0.98\n",
      "  ROC_AUC score: 1.00\n",
      "  Validation took: 0:00:27\n",
      "\n",
      "======== Epoch 2 / 2 ========\n",
      "Training...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/maryam/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/_functions.py:61: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Batch   100  of  1,044.    Elapsed: 0:00:41.\n",
      "  Batch   200  of  1,044.    Elapsed: 0:01:22.\n",
      "  Batch   300  of  1,044.    Elapsed: 0:02:02.\n",
      "  Batch   400  of  1,044.    Elapsed: 0:02:43.\n",
      "  Batch   500  of  1,044.    Elapsed: 0:03:24.\n",
      "  Batch   600  of  1,044.    Elapsed: 0:04:05.\n",
      "  Batch   700  of  1,044.    Elapsed: 0:04:46.\n",
      "  Batch   800  of  1,044.    Elapsed: 0:05:26.\n",
      "  Batch   900  of  1,044.    Elapsed: 0:06:07.\n",
      "  Batch 1,000  of  1,044.    Elapsed: 0:06:48.\n",
      "\n",
      "  Average training loss: 0.02\n",
      "  Training epcoh took: 0:07:06\n",
      "\n",
      "Running Validation...\n",
      "Saving model to ./model_save/\n",
      "  Accuracy: 0.98\n",
      "  ROC_AUC score: 1.00\n",
      "  Validation took: 0:00:28\n",
      "\n",
      "Training complete!\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "import os\n",
    "from torch.utils.tensorboard import SummaryWriter\n",
    "\n",
    "\n",
    "# This training code is based on the `run_glue.py` script here:\n",
    "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
    "\n",
    "# Set the seed value all over the place to make this reproducible.\n",
    "seed_val = 42\n",
    "\n",
    "random.seed(seed_val)\n",
    "np.random.seed(seed_val)\n",
    "torch.manual_seed(seed_val)\n",
    "torch.cuda.manual_seed_all(seed_val)\n",
    "\n",
    "# Store the average loss after each epoch so we can plot them.\n",
    "loss_values = []\n",
    "log_dir  = './tensorboard_log/'\n",
    "if not os.path.exists(log_dir):\n",
    "    os.makedirs(log_dir)\n",
    "writer = SummaryWriter(log_dir=log_dir)\n",
    "\n",
    "\n",
    "# For each epoch...\n",
    "for epoch_i in range(0, epochs):\n",
    "    \n",
    "    # ========================================\n",
    "    #               Training\n",
    "    # ========================================\n",
    "    \n",
    "    # Perform one full pass over the training set.\n",
    "\n",
    "    print(\"\")\n",
    "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
    "    print('Training...')\n",
    "\n",
    "    # Measure how long the training epoch takes.\n",
    "    t0 = time.time()\n",
    "\n",
    "    # Reset the total loss for this epoch.\n",
    "    total_loss = []\n",
    "    max_accuracy = 0.65\n",
    "    # Put the model into training mode. Don't be mislead--the call to \n",
    "    # `train` just changes the *mode*, it doesn't *perform* the training.\n",
    "    # `dropout` and `batchnorm` layers behave differently during training\n",
    "    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n",
    "    model.train()\n",
    "\n",
    "    # For each batch of training data...\n",
    "    for step, batch in enumerate(train_dataloader):\n",
    "\n",
    "        # Progress update every 100 batches.\n",
    "        if step % 100 == 0 and not step == 0:\n",
    "            # Calculate elapsed time in minutes.\n",
    "            elapsed = format_time(time.time() - t0)\n",
    "            \n",
    "            # Report progress.\n",
    "            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
    "\n",
    "        # Unpack this training batch from our dataloader. \n",
    "        #\n",
    "        # As we unpack the batch, we'll also copy each tensor to the GPU using the \n",
    "        # `to` method.\n",
    "        #\n",
    "        # `batch` contains three pytorch tensors:\n",
    "        #   [0]: input ids \n",
    "        #   [1]: attention masks\n",
    "        #   [2]: labels \n",
    "        b_input_ids = batch[0].cuda()\n",
    "        b_input_mask = batch[1].cuda()\n",
    "        b_labels = batch[2].cuda()\n",
    "\n",
    "        # Always clear any previously calculated gradients before performing a\n",
    "        # backward pass. PyTorch doesn't do this automatically because \n",
    "        # accumulating the gradients is \"convenient while training RNNs\". \n",
    "        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
    "        model.zero_grad()        \n",
    "\n",
    "        # Perform a forward pass (evaluate the model on this training batch).\n",
    "        # This will return the loss (rather than the model output) because we\n",
    "        # have provided the `labels`.\n",
    "        # The documentation for this `model` function is here: \n",
    "        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
    "\n",
    "        outputs = model(b_input_ids, \n",
    "                    token_type_ids=None, \n",
    "                    attention_mask=b_input_mask, \n",
    "                    labels=b_labels)\n",
    "        \n",
    "        # The call to `model` always returns a tuple, so we need to pull the \n",
    "        # loss value out of the tuple.\n",
    "        loss = outputs[0].mean()\n",
    "\n",
    "        # Accumulate the training loss over all of the batches so that we can\n",
    "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
    "        # single value; the `.item()` function just returns the Python value \n",
    "        # from the tensor.\n",
    "\n",
    "        total_loss.append(loss.data.cpu().numpy())\n",
    "        # print(loss)\n",
    "        # if step % 10 == 0:\n",
    "        #     writer.add_scalar('loss', loss, step)\n",
    "\n",
    "        # Perform a backward pass to calculate the gradients.\n",
    "        loss.backward()\n",
    "\n",
    "        # Clip the norm of the gradients to 1.0.\n",
    "        # This is to help prevent the \"exploding gradients\" problem.\n",
    "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
    "\n",
    "        # Update parameters and take a step using the computed gradient.\n",
    "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
    "        # modified based on their gradients, the learning rate, etc.\n",
    "        optimizer.step()\n",
    "\n",
    "        # Update the learning rate.\n",
    "        scheduler.step()\n",
    "\n",
    "    # Calculate the average loss over the training data.\n",
    "    avg_train_loss = sum(total_loss) / len(train_dataloader)            \n",
    "    \n",
    "    # Store the loss value for plotting the learning curve.\n",
    "    loss_values.append(avg_train_loss)\n",
    "\n",
    "    print(\"\")\n",
    "    print(\"  Average training loss: {0:.2f}\".format(avg_train_loss))\n",
    "    print(\"  Training epcoh took: {:}\".format(format_time(time.time() - t0)))\n",
    "        \n",
    "    # ========================================\n",
    "    #               Validation\n",
    "    # ========================================\n",
    "    # After the completion of each training epoch, measure our performance on\n",
    "    # our validation set.\n",
    "\n",
    "    print(\"\")\n",
    "    print(\"Running Validation...\")\n",
    "\n",
    "    t0 = time.time()\n",
    "\n",
    "    # Put the model in evaluation mode--the dropout layers behave differently\n",
    "    # during evaluation. \n",
    "    model.eval()\n",
    "\n",
    "    # Tracking variables \n",
    "    eval_loss, eval_accuracy, eval_roc_auc = 0, 0, 0\n",
    "    nb_eval_steps, nb_eval_examples = 0, 0\n",
    "\n",
    "    # Evaluate data for one epoch\n",
    "    for batch in validation_dataloader:\n",
    "        \n",
    "        # Add batch to GPU\n",
    "        batch = tuple(t for t in batch)\n",
    "        \n",
    "        # Unpack the inputs from our dataloader\n",
    "        b_input_ids, b_input_mask, b_labels = batch\n",
    "        \n",
    "        # Telling the model not to compute or store gradients, saving memory and\n",
    "        # speeding up validation\n",
    "        with torch.no_grad():        \n",
    "\n",
    "            # Forward pass, calculate logit predictions.\n",
    "            # This will return the logits rather than the loss because we have\n",
    "            # not provided labels.\n",
    "            # token_type_ids is the same as the \"segment ids\", which \n",
    "            # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
    "            # The documentation for this `model` function is here: \n",
    "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
    "            outputs = model(b_input_ids, \n",
    "                            token_type_ids=None, \n",
    "                            attention_mask=b_input_mask)\n",
    "        \n",
    "        # Get the \"logits\" output by the model. The \"logits\" are the output\n",
    "        # values prior to applying an activation function like the softmax.\n",
    "        logits = outputs[0]\n",
    "\n",
    "        # Move logits and labels to CPU\n",
    "        logits = logits.detach().cpu().numpy()\n",
    "        label_ids = b_labels.to('cpu').numpy()\n",
    "        \n",
    "        # Calculate the ROC_AUC score for this batch of evaluation sentences.\n",
    "        tmp_eval_roc_auc = calc_roc_auc(label_ids, logits[:,1]-logits[:,0])\n",
    "        \n",
    "        # Accumulate the total ROC_AUC of evaluation set.\n",
    "        eval_roc_auc += tmp_eval_roc_auc \n",
    "        \n",
    "        # Calculate the accuracy for this batch of test sentences.\n",
    "        tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n",
    "        \n",
    "        # Accumulate the total accuracy.\n",
    "        eval_accuracy += tmp_eval_accuracy\n",
    "        \n",
    "        # Save the model if it has a higher validation accuracy\n",
    "        if tmp_eval_accuracy > max_accuracy: \n",
    "            max_accuracy = tmp_eval_accuracy\n",
    "            model_save()\n",
    "       \n",
    "        # Track the number of batches\n",
    "        nb_eval_steps += 1\n",
    "        \n",
    "\n",
    "    # Report the final accuracy for this validation run.\n",
    "    print(\"  Accuracy: {0:.2f}\".format(eval_accuracy/nb_eval_steps))\n",
    "    print(\"  ROC_AUC score: {0:.2f}\".format(eval_roc_auc/nb_eval_steps))\n",
    "    print(\"  Validation took: {:}\".format(format_time(time.time() - t0)))\n",
    "\n",
    "print(\"\")\n",
    "print(\"Training complete!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(loss.data.cpu().numpy())\n",
    "# !pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvoAAAGXCAYAAADCnfTMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdeXxV1b3//1dOZkjIRAhDSIIeEmZRwJCgRiQgMqi0TkWwWBSxRNve26u91fZ+7bX3Eu2P9KsIWsujhattQakpBAyEQVASEgxzQMJ0MjCGTCfzdPb3Dy/nZ0qABBJOkvN+/pe111rns896AG921t7bxTAMAxERERER6VZMji5ARERERETan4K+iIiIiEg3pKAvIiIiItINKeiLiIiIiHRDCvoiIiIiIt2Qgr6IiIiISDekoC8iItf0u9/9jqioKIqKim5ofF1dHVFRUfz6179u58ra5q9//StRUVHs37/foXWIiNwqbo4uQEREri8qKqrVfbdu3UpoaGgHViMiIl2Bgr6ISBfw1ltvNfs5Ozub1atX8+STTzJmzJhmxwIDA9v1s3/605/y0ksv4enpeUPjPT09OXjwIK6uru1al4iIXJuCvohIF/DII480+7mpqYnVq1czevToK45djWEY1NTU0KNHjzZ9tpubG25uN/fPxY3+J0FERG6c9uiLiHRDO3fuJCoqipSUFFauXMnUqVMZOXIkH330EQB79+7llVdeYcqUKdxxxx3cddddPP3002zfvv2KuVrao3+5raCggMTERO69915GjhzJrFmz2LVrV7PxLe3R/27bnj17+MEPfsAdd9zB+PHj+fWvf01NTc0VdaSnp/P4448zcuRI7rnnHhITEzly5AhRUVH84Q9/uOHv6tKlS/z617/mvvvuY8SIEUycOJE333yT8vLyZv2qq6tJSkriwQcfZNSoUYwbN46ZM2eSlJTUrN+WLVv4wQ9+QHR0NKNGjWLixIm8/PLLFBQU3HCNIiI3Qlf0RUS6sQ8//JCKigq+//3vExQUxMCBAwFITU0lPz+fadOm0b9/f0pKSvjss89YuHAh7777LlOmTGnV/P/6r/+Kp6cnzz33HHV1dfz5z3/mxRdfJC0tjZCQkOuOP3ToEJs2beKxxx7j4YcfJiMjg9WrV+Ph4cHrr79u75eRkcHzzz9PYGAgL7zwAj4+PmzYsIHMzMwb+2L+V1lZGU8++SRnz57l8ccfZ8iQIRw6dIiPPvqIzMxM1qxZg7e3NwC/+tWv2LBhA7NmzWL06NE0NDRgsVjYvXu3fb6vvvqKhIQEhg0bxsKFC/Hx8eHChQvs2rWLwsJC+/cvInIrKOiLiHRjFy9e5PPPP8ff379Z+09/+tMrtvDMnTuXhx9+mOXLl7c66IeEhPDOO+/g4uICYP/NwCeffEJCQsJ1xx87doxPP/2UYcOGAfCDH/yAH/7wh6xevZpXXnkFDw8PAP77v/8bd3d31qxZQ79+/QCYPXs2Tz31VKvqvJr333+fwsJCfvvb3/LYY4/Z2wcPHkxiYqL9Py6GYbBt2zbi4+P57//+76vOt2XLFgBWrlyJr6+vvb0134WISHvT1h0RkW7s+9///hUhH2gW8mtqaigtLaWuro67776bo0ePUl9f36r5f/jDH9pDPsCYMWNwd3fHYrG0avy4cePsIf+y8ePHU19fz7lz5wA4c+YMx44d48EHH7SHfAAPDw+eeeaZVn3O1Vz+zcP3vve9Zu1z5szB19eXtLQ0AFxcXOjZsyfHjh3j5MmTV53P19cXwzDYtGkTTU1NN1WbiMjN0hV9EZFuLCIiosX2ixcvkpSUxPbt2yktLb3ieEVFBUFBQded/5+3ori4uODn50dZWVmr6mtpK8vl/5iUlZURHh5OYWEhAIMGDbqib0ttrWUYBmfPnmX8+PGYTM2ve3l4eBAWFmb/bIDXXnuNX/7yl0ybNo3w8HCio6N54IEHuP/+++3/2fnhD3/IF198wWuvvcbixYsZO3Ys9957L9OmTSMgIOCGaxURuREK+iIi3djl/eXf1dTUxLx58ygsLOSZZ55h+PDh+Pr6YjKZ+Nvf/samTZuw2Wytmv+fA/JlhmHc1PjvztHaudqqrfM+9NBDREdHs3PnTrKysvjqq69Ys2YNMTEx/PGPf8TNzY3evXvz2WefsWfPHtLT09mzZw9vvvkm77zzDitWrGDEiBEdci4iIi1R0BcRcTKHDx/m5MmT/Mu//AsvvPBCs2OXn8rTmVx++dfp06evONZSW2uZTCYGDBjAqVOnsNlszf7TUV9fT35+PmFhYc3GBAYG8uijj/Loo49iGAb/9V//xapVq9i5cycPPPAA8O3jSGNiYoiJiQG+/b4fe+wxPvjgA959990brldEpK20R19ExMlcDrT/fEU7JyeHHTt2OKKkawoNDSUyMpJNmzbZ9+3Dt2F81apVNzV3fHw858+fJzk5uVn7X/7yFyoqKpg8eTIADQ0NVFZWNuvj4uLC0KFDAeyP4iwpKbniM8xmMx4eHq3eziQi0l50RV9ExMlERUURERHB8uXLsVqtREREcPLkSdasWUNUVBQ5OTmOLvEKv/jFL3j++ed54okneOqpp+jZsycbNmxodiPwjVi4cCGbN2/m9ddf58CBA0RFRXH48GH+/ve/ExkZybx584Bv7xeIj48nPj6eqKgoAgMDKSgo4K9//SsBAQHExcUB8Morr2C1WomJiWHAgAFUV1eTkpJCXV0djz766M1+DSIibaKgLyLiZDw8PPjwww956623WLt2LXV1dURGRrJkyRKys7M7ZdCfMGECH3zwAb///e95//338fPzY8aMGcTHx/P000/j5eV1Q/P6+/uzevVq3n33XbZu3cratWsJCgpizpw5vPTSS/Z7HHx9fZkzZw4ZGRl8+eWX1NTUEBwczJQpU3jhhRcIDAwE4Hvf+x7/+Mc/+Pvf/05paSm+vr4MHjyYZcuWMWnSpHb7PkREWsPF6Ki7nERERDrYunXr+Ld/+zfee+894uPjHV2OiEinoj36IiLS6dlstiue7V9fX8/KlSvx8PBgzJgxDqpMRKTz0tYdERHp9CorK5k2bRozZ84kIiKCkpISNmzYwPHjx0lISNAz6kVEWqCgLyIinZ6XlxcTJkxg8+bNXLp0CYDbbruN3/zmNzz55JMOrk5EpHPSHn0RERERkW5Ie/RFRERERLohBX0RERERkW5Ie/Q7UGlpFTbbrd0ZFRTkQ3Fx5fU7SpemdXYOWmfnoHXu/rTGzsFR62wyuRAQ0LPFYwr6HchmM2550L/8udL9aZ2dg9bZOWiduz+tsXPobOusrTsiIiIiIt2Qgr6IiIiISDekoC8iIiIi0g0p6IuIiIiIdEMK+iIiIiIi3ZCCvoiIiIhIN6SgLyIiIiLSDSnoi4iIiIh0Qwr6IiIiIiLdkN6MKyIiIiJygzJyzvP3HScpsdYR2MuT78XdTszwvo4uC1DQFxERERG5IRk551n5+TfUN9oAKLbWsfLzbwA6RdjX1h0RERERkRvw9x0n7SH/svpGG3/fcdJBFTWnoC8iIiIi0kZFZTUUW+taPHa19ltNW3dERERERFrpfEk1G9ItZORcuGqfoF6et7Ciq1PQFxERERG5jsKiSjZk5JF19ALuriYmjQklOMCLT7c3377j4Wbie3G3O7DS/5+CvoiIiIjIVeSdryAl3UJ2bhGeHq5MjQ7jwXFh9OrpAUBPL3c9dUdEREREpKs4ebaclF0WDpwsxtvTjZmxEUweNxAfb/dm/WKG9yVmeF+Cg30pKqpwULUtU9AXEREREflfuQVlrN91mhxLKT293Jh1321MuiuUHl5dLzZ3vYpFRERERNqRYRgcyStl/S4LuQVl9OrhzuMTb2finQPw8ui6cbnrVi4iIiIichMMw+DgyWJS0i2cPGslwNeTH8QPJu6O/ni4uzq6vJumoC8iIiIiTsVmGOzLvURKuoW8CxUE9fLimQejmDCyH+5u3ec1Uwr6IiIiIuIUbDaDPd9cJCXDwpmiKvoEePPstCHEDO+Lm2v3CfiXKeiLiIiISLfWZLOxO+cCKRl5XCippn/vniyYOYxxQ/vgaup+Af8yBX0RERER6ZYam2zsOnSODRl5XCqvZWAfH3786AjuigrG5OLi6PI6nIK+iIiIiHQrDY1N7Dxwjo278yitqGNQP19mx0dyhzkIFycI+Jcp6IuIiIhIt1BX38QX+8+QmplPeVU9g0P9eHbaEIZHBDpVwL9MQV9EREREurSauka27S1kU1YBlTUNDA0P4IWHhxMV5u+UAf8yBX0RERER6ZKqahtI21PAlq8Lqa5rZORtQcyMjcAc6ufo0joFBX0RERER6VKs1fWk7Slga3YhtfVN3Dm4NzNiIxjUr5ejS+tUHBr0q6qqSEpKIjU1FavVitlsZtGiRUyaNOm6Y/Pz81m8eDGZmZnYbDbGjh3Lq6++itlsbtYvKSmJnJwccnJyKCkpISEhgZdeeumK+QzD4JNPPuGvf/0rFosFT09PIiMjWbhwIbGxse12ziIiIiJyY8oq60jNzOeL/WdoaLAxdkgfZsRGMLCPj6NL65Qc+uDQhIQE1q9fz09+8hM++OADzGYzCQkJ7Nix45rjiouLmT17NmfOnCExMZElS5ZQXl7OnDlzOH/+fLO+q1atorKykvj4+GvO+d577/GrX/2KUaNGsXTpUt58800Mw+BHP/oR6enpN32uIiIiInJjSqy1fLw5l1eWZ7Dl60LGRPbhzeejefHREQr51+CwK/o7duwgPT2dpUuXMnnyZADGjx9PQUEBixcvJi4u7qpjV6xYgdVqZe3atYSEhAAwevRoJk2axPLly3njjTfsfbOzszGZTFitVtasWXPVOf/+978zZsyYZmNjY2OJjo5m3bp1uqovIiIicotdLKthY0Yeuw6dA2DCyL5MGx9On4AeDq6sa3DYFf20tDR8fX2bbdNxcXFh1qxZnDp1ihMnTlx17JYtW4iNjbWHfICAgAAmTpxIWlpas76mVr7tzM3NDV9f32ZtXl5euLu74+Hh0ao5REREROTmnSuuYkXKEX75wW7SD5/nvtH9WfxCDPMeGqqQ3wYOC/rHjx/HbDZfEcSjoqIAyM3NbXFcbW0t+fn5REZGXnEsKiqK4uJiiouL21zPM888w5dffsknn3yC1WrlwoUL9u07s2fPbvN8IiIiItI2hUWVvP+Pw7z+YSZ7vrlI/NhQEhfGMHdKFEF+Xo4ur8tx2NadsrIyIiIirmj38/OzH29JeXk5hmHY+32Xv7+/fWxQUFCb6pkzZw6enp785je/4fXXXwcgKCiIFStWMGTIkDbNJSIiIiKtl3e+gvXpFvbmFuHp4crU8WE8OC6MXj21q+JmOPSpO9d6gcH1Xm7Q3i8/SE5O5re//S3z5s1jwoQJ1NTU8PHHH7Nw4UJWrFjByJEj2zxnUJBjbg4JDva9fifp8rTOzkHr7By0zt2f1rhl3+SVsDotl6+PXqCnlxtPTY7i4ftuw7dH1wz4nW2dHRb0/f39W7xqX15eDtDiFfvL7S4uLi2Ovdx2+cp+a5WXl/Mf//EfPPnkk/zrv/6rvf2ee+5hxowZvP3226xatapNcwIUF1disxltHnczgoN9KSqquKWfKbee1tk5aJ2dg9a5+9MaX+lYfinr0y0csZTi4+3O9+67jQfuCqWHlxu1VXXUVtU5usQ2c9Q6m0wuV7247LCgbzab2bx5Mzabrdk+/ct781vagw/f3iA7cODAFvfw5+bmEhgY2OZtO6dPn6a2tpYRI0Y0a3d3dycqKors7Ow2zSciIiIizRmGwRFLKet3nSa3sJxePT14YqKZ++/sj5eH3uHaERx2M+7kyZOxWq1s27atWXtycjKDBg264sVX3xUfH096ejpFRUX2trKyMrZv325/VGdb9OnTB4CDBw82a6+vr+fIkSPNnu4jIiIiIq1nGAb7T1zit/+Tzf+3ej9F5bXMjh/MWwtjmBodppDfgRz2zcbFxREdHc1rr71GWVkZoaGhJCcnk52dzbJly+z95s6dS1ZWFseOHbO3zZ8/n3Xr1rFgwQIWLVqEm5sby5cvx83NjYULFzb7nKysLEpKSqitrQXgxIkTpKam2mvw9vamf//+xMfH85e//AUPDw/7Hv2PPvqIgoIC3nrrrVvwjYiIiIh0HzbDYF9uEevTLeRfqKS3nxfPTI1iwoh+uLs59J2tTsPFMIxbu4n8OyorK1myZAmbNm3CarViNptZtGhRs7fYthT0ASwWC4mJiWRmZmIYBmPGjOHVV19l8ODBzfpdHt+SrVu3EhoaCnz72M5Vq1axfv16CgsL8fLywmw289xzz13z5V3Xoj360lG0zs5B6+wctM7dn7Otsc1mkPXNBTak53HmUhUhAd5Mj4lg/PAQ3Fy7b8DvjHv0HRr0uzsFfekoWmfnoHV2Dlrn7s9Z1rixyUbmkQukZORxoaSa/r17MiM2nLuHhGAyte/TEjujzhj0tSlKRERERG5YQ6ONXYfPsTEjj0vltYT18eHHj47grqhgTO38OHRpGwV9EREREWmz+oYmdh44y+eZ+ZRW1DGoXy9mT47kjtuD2v19R3JjFPRFREREpNVq6xv5Yt9ZNmXlU15Vz+BQP56dNoThEYEK+J2Mgr6IiIiIXFdNXSNbswvZvKeAypoGhoYHsPCR4USFBTi6NLkKBX0RERERuarKmga2fF3Alq8Lqa5rZNTtQcyIjcA8wM/Rpcl1KOiLiIiIyBWs1fVszipg295CauubuCsymBmx4UT07eXo0qSVFPRFRERExK6sso7UzHy+2H+GhgYb44b2YUZMBKF9Wn6Eo3ReCvoiIiIiQnF5LZ9n5rHzwDlsNoPxw0OYHhNOv6Ceji5NbpCCvoiIiIgTu1hWw8YMC7sOnQdgwsi+TBsfTp+AHo4tTG6agr6IiIiIEzpXXMWGjDx251zAZHIhbnR/HooOJ8jPy9GlSTtR0BcRERFxIoUXK0nJsLDn6EXc3U3Ejw3lwbvDCPD1dHRp0s4U9EVEREScgOW8lfW7LOw7fglPD1ceGh/OlLsH0quHh6NLkw6ioC8iIiLSjZ04U05KuoWDJ4vp4enGwxMiiB87EB9vd0eXJh1MQV9ERESkGzqWX8q6XRaO5pXi4+3O9+NuY+KdofTwUvxzFlppERERkW7CMAxyLCWk7LKQW1hOr54ePDHRzMQ7B+Dp4ero8uQWU9AXERER6eIMw+DAyWJS0i2cOmslwNeTpydHcu+ofni4K+A7KwV9ERERkS7KZhjsPVZESrqF/IuV9Pbz4pmpUUwY0Q93N5OjyxMHU9AXERER6WJsNoOsoxdIycjj7KUqQgJ7MH/6UKKHheDmqoAv31LQFxEREekiGpts7M65wIYMCxdKaxjQuycLHh7G3UNCMJlcHF2edDIK+iIiIiKdXEOjjV2HzrFxdx6XymsJC/Fh0awR3BkZjMlFAV9apqAvIiIi0knVNzSx88BZPs/Mp7Sijtv69+LpyZGMuj0IFwV8uQ4FfREREZFOpra+kS/2nSU1Kx9rVT2RoX78aNpQhkUEKOBLqynoi4iIiHQS1bWNbNtbyOY9BVTWNDAsIoCZjwwnKizA0aVJF6SgLyIiIuJglTUNbPm6gC1fF1Jd18io24OYGRvB7QP8HF2adGEK+iIiIiIOYq2qZ9OefLbtPUNdfRN3RQYzMzaC8L6+ji5NugGHB/2qqiqSkpJITU3FarViNptZtGgRkyZNuu7Y/Px8Fi9eTGZmJjabjbFjx/Lqq69iNpub9UtKSiInJ4ecnBxKSkpISEjgpZdeanHO+vp6Vq5cSXJyMvn5+fTo0YPIyEh+85vfMGjQoHY5ZxEREXFupRV1bMrK54t9Z2hotDFuaB9mxEYQGuzj6NKkG3F40E9ISODIkSP8/Oc/JzQ0lM8++4yEhATef/994uLirjquuLiY2bNnExQURGJiIq6urixfvpw5c+aQnJxM37597X1XrVpFVFQU8fHxrFmz5qpzNjY28uKLL3Ls2DEWLFjAkCFDqKioYN++fdTV1bXreYuIiIjzKS6vZWNmHl8eOIfNZhAzPIRpMeH0C+rp6NKkG3Jo0N+xYwfp6eksXbqUyZMnAzB+/HgKCgpYvHjxNYP+ihUrsFqtrF27lpCQEABGjx7NpEmTWL58OW+88Ya9b3Z2NiaTCavVes2gv2rVKvbu3cu6desYOHCgvb01v10QERERuZqLpdVsyMgj/fB5ACaM7Me0mHD6+Hs7uDLpzhz6juS0tDR8fX2bBWkXFxdmzZrFqVOnOHHixFXHbtmyhdjYWHvIBwgICGDixImkpaU162syte40P/roI6ZOndos5IuIiIjcqHPFVSz5Sza//EMmGTkXuH/0ABIXxjDvoSEK+dLhHHpF//jx45jN5iuCeFRUFAC5ublX7LcHqK2tJT8/n6lTp15xLCoqipSUFIqLiwkKCmp1LWfPnuXMmTOEh4fzH//xH2zcuJGamhoiIyN5+eWXuf/++9t2ciIiIuK0Ci9Wsj7dwtffXMTDw5XJ40J58O4w/H08HV2aOBGHBv2ysjIiIiKuaPfz87Mfb0l5eTmGYdj7fZe/v799bFuC/sWLFwH48MMPiYyMtO/7/9Of/sTChQv58MMPuffee1s9n4iIiDgfy3kr63dZ2Hf8El4erkyLCecHU4dSX1Pv6NLECTn8Ztxrvd3tem9+a883w9lsNgDc3d358MMP8fH59q738ePHM2XKFJYtW9bmoB8U5Jg754OD9UguZ6B1dg5aZ+egde76vrGU8Le0Y2R/c5Ge3u7MnhLFjHtvw7eHx7cddCXfKXS2P8sODfr+/v4tXrUvLy8HaPGK/eV2FxeXFsdebrt8Zb8ttQDceeed9pAP4OnpSXR0NJs2bWrTfADFxZXYbEabx92M4GBfiooqbulnyq2ndXYOWmfnoHXuugzD4Fh+GevTLRzNK8XH253vx93GA3eF4u3pRm1VHbVVdVpjJ+GodTaZXK56cdmhQd9sNrN582ZsNluzffq5ubkAREZGtjjOy8uLgQMH2vt9V25uLoGBgW3atgMQFhaGt3fLN8UYhtHqG3pFRESkezMMg5zTJaxPt3C8sBy/nh48+YCZ+0cPwNPD1dHlidg5NL1OnjwZq9XKtm3bmrUnJyczaNCgFm/EvSw+Pp709HSKiorsbWVlZWzfvt3+qM62cHNzY+LEiezdu5fKykp7e21tLZmZmYwaNarNc4qIiEj3YRgG+49f4s1V2SxZc4Biay1PT44kcWEMD94dppAvnY5Dr+jHxcURHR3Na6+9RllZGaGhoSQnJ5Odnc2yZcvs/ebOnUtWVhbHjh2zt82fP59169axYMECFi1ahJubG8uXL8fNzY2FCxc2+5ysrCxKSkqora0F4MSJE6SmptpruHwl/+WXX2bnzp3Mnz+f5557DpPJxJ///GdKSkpISkrq6K9DREREOiGbYbD3WBHr0y0UXKykt58XP5waxYSR/XBz1W/8pfNyMQzj1m4i/yeVlZUsWbKETZs2YbVaMZvNLFq0iPj4eHufloI+gMViITExkczMTAzDYMyYMbz66qsMHjy4Wb/L41uydetWQkND7T9/8803vP322+zduxebzcaoUaP4yU9+wtixY9t8btqjLx1F6+wctM7OQevceTXZbOw5epGUjDzOXqoiJLAHM2LCiR4W0qaArzV2Dp1xj77Dg353pqAvHUXr7By0zs5B69z5NDbZyMg5z4aMPC6W1jCgd09mxEYwbkgfTKa2P/FPa+wcOmPQd/jjNUVEREQ6g4ZGG7sOnWPj7jwuldcSFuLDolkjuTOyN6Z2fKS3yK2ioC8iIiJOra6hiZ0HzpKamU9pRR239e/F05MjGXV7ULu+s0fkVlPQFxEREadUW9/I9n1n2JSZj7W6gciB/vxo+lCGhQco4Eu3oKAvIiIiTqW6tpGtewtJ21NAZU0DwyMCmBEbQVRYgKNLE2lXCvoiIiLiFCprGkjbU8CW7EJq6hq54/YgZsRGcPsAP0eXJtIhFPRFRESkW7NW1bMpK59t+85QV9/EmMhgZsRGEN7X19GliXQoBX0RERHplkor6kjNzGfH/jM0NNm4e2gIM2LCGRDc8qMIRbobBX0RERHpVi6V1/D57ny+PHgWmw1iRoQwPSaCvoE9HF2ayC2loC8iIiLdwoXSajZk5JFx+DwA94zqx7Tx4QT7ezu4MhHHUNAXERGRLu3spSo2ZFjYfeQCbq4m7r9zAA9FhxHYy8vRpYk4lIK+iIiIdEkFFytZn24h+5uLuLubmDJuIFPvDsPPx9PRpYl0Cgr6IiIi0qWcPmclJd3CvuOX8PJwZVpMOJPHDaRXDw9HlybSqSjoi4iISJdworCcdemnOXyqhJ5ebjx6zyAmjQ2lp5e7o0sT6ZQU9EVERKTTMgyDb/LLWL/rNN/kl+Hj7c73427jgbtC8fZUjBG5Fv0JERERkU7HMAxyTpewLt3CicJy/Hw8eOoBM3GjB+Dp4ero8kS6BAV9ERER6TQMw2D/iUukpFs4fa6CwF6ePD05kvvu6Ie7mwK+SFso6IuIiIjD2QyD7GNFrN9lobCokmB/L+Y9NITYEX1xczU5ujyRLklBX0RERBymyWYj6+hFUtItnCuupm9gD+ZPH8r44SG4mhTwRW6Ggr6IiIjcco1NNjIOn2fD7jwultYwILgnCx8ZztioPphMLo4uT6RbUNAXERGRW6ah0cZXh86xMSOPYmst4SG+JHxvJKMH98bkooAv0p4U9EVERKTD1TU0sXP/WT7PzKOssp7b+/di7oORjLwtCBcFfJEOoaAvIiIiHaamrpEv9p1hU1Y+1uoGogb689yMYQwND1DAF+lgCvoiIiLS7qprG9maXcDmPQVU1TYyfFAgM2MjiBzo7+jSRJyGgr6IiIi0m8qaBjbvKWBrdiE1dY2MNvdmemw4t/f3c3RpIk5HQV9ERERuWnlVPZuy8tm+9wx1DU2MiQpmZmwEYSG+ji5NxGkp6IuIiMgNK62o4/PMPHbuP0tDk43ooSFMjwlnQLCPo0sTcXoODfpVVVUkJSWRmpqK1WrFbDazaNEiJpxKxrYAACAASURBVE2adN2x+fn5LF68mMzMTGw2G2PHjuXVV1/FbDY365eUlEROTg45OTmUlJSQkJDASy+9dM256+vreeSRRzh16hT//u//zrx5827mNEVERLqdS+U1bNydz1cHz2KzQcyIEKbHRNA3sIejSxOR/+XQoJ+QkMCRI0f4+c9/TmhoKJ999hkJCQm8//77xMXFXXVccXExs2fPJigoiMTERFxdXVm+fDlz5swhOTmZvn372vuuWrWKqKgo4uPjWbNmTavqWrp0KRUVFTd9fiIiIt3NhdJqNmTkkXH4PAD3jurHQ+PDCfb3dnBlIvLPHBb0d+zYQXp6OkuXLmXy5MkAjB8/noKCAhYvXnzNoL9ixQqsVitr164lJCQEgNGjRzNp0iSWL1/OG2+8Ye+bnZ2NyWTCarW2KugfPXqUP/3pTyQmJvKzn/3sJs9SRESkezh7qYqUDAuZRy7g5mpi4p0DmBodRmAvL0eXJiJXYXLUB6elpeHr69tsm46LiwuzZs3i1KlTnDhx4qpjt2zZQmxsrD3kAwQEBDBx4kTS0tKa9TWZWn+KjY2N/PKXv+TJJ59k1KhRbTgbERGR7in/QgXLPjvEr/6Yyb7cSzw4Loy3FsYwe3KkQr5IJ+ewK/rHjx/HbDZfEcSjoqIAyM3NvWK/PUBtbS35+flMnTr1imNRUVGkpKRQXFxMUFBQm2tasWIFJSUl/PSnP6WsrKzN40VERLqL0+esrN9lYf+JS3h7ujI9NpzJYwfi28PD0aWJSCs5LOiXlZURERFxRbufn5/9eEvKy8sxDMPe77v8/f3tY9sa9E+dOsV7771HUlISPj4+CvoiIuKUjheWsX6XhcOnS+jp5caj9wwifmwoPbzcHV2aiLSRQ2/Gvdarr6/3Wuz2fG22YRi8/vrr3H///a164k9rBQU55tFiwcF6ZrEz0Do7B62zc3D0OhuGwcETl1idlsuhk5fw8/Hgh9OHMS02QgG/nTh6jeXW6Gzr7LCg7+/v3+JV8/LycoAWr9hfbndxcWlx7OW2y1f2W+uTTz7h6NGjfPrpp1itVgAqKysBqKurw2q14uPj06b9/gDFxZXYbEabxtys4GBfior0xKDuTuvsHLTOzsGR62wYBodPl7B+l4UTZ8rx8/HgqQfMxI0egKeHK1UVtVRV1Dqktu5Ef5adg6PW2WRyuerFZYcFfbPZzObNm7HZbM0CdG5uLgCRkZEtjvPy8mLgwIH2ft+Vm5tLYGBgm7ftHD9+nOrqaqZNm3bFsSVLlrBkyRI2btzI7bff3qZ5RUREOiPDMNh//BLr0y1YzlcQ2MuTOVMiuXdUP9zdXB1dnoi0E4cF/cmTJ/Ppp5+ybds24uPj7e3JyckMGjSoxRtxL4uPj+fjjz+mqKiI4OBg4Nur+du3b2f69OltrmXOnDnNagC4dOkS//Iv/8LTTz/Ngw8+SP/+/ds8r4iISGdisxlk5xaxfpeFwqJKgv29mPfQEGJH9MXN1WEP4hORDuKwoB8XF0d0dDSvvfYaZWVlhIaGkpycTHZ2NsuWLbP3mzt3LllZWRw7dszeNn/+fNatW8eCBQtYtGgRbm5uLF++HDc3NxYuXNjsc7KysigpKaG29ttfPZ44cYLU1FR7Dd7e3oSHhxMeHt5sXGFhIQBhYWFER0d3yHcgIiJyKzTZbGQduUhKhoVzxdX0DezBczOGEj0sBNc2bksVka7DYUHfxcWFZcuWsWTJEpKSkrBarZjNZpYuXcoDDzxwzbG9e/fm448/JjExkVdeeQXDMBgzZgwfffTRFVfe3333XbKysuw/p6am2oP+1q1bCQ0Nbf+TExER6QQam2ykHz7Pxow8LpbVEBrck4WPDGdsVB9MpvZ7qIWIdE4uhmHc2rtFnYhuxpWOonV2Dlpn59AR69zQ2MRXB8+xcXcexdY6wvv68nBsBHcM7o2pHZ9aJ62jP8vOQTfjioiISIepa2hix/6zpGbmUVZZz+0DejH3wSGMvC2wXR9LLSJdg4K+iIhIF1dT18j2fWfYlJVPRXUDQ8L8eX7GMIaEByjgizgxBX0REZEuqrq2gS3ZhaTtKaCqtpERgwKZERtB5MC2vU9GRLonBX0REZEupqK6nrSvC9iaXUhNXROjzb2ZERvBbf17Obo0EelEFPRFRES6iPKqejZl5bN97xnqG5oYExXMjNgIwkJ8HV2aiHRCCvoiIiKdXGlFHZ/vzmPHgbM0NtmIHhbC9JgIBvTu6ejSRKQTU9AXERHppC6V1bBxdx5fHTqHYUDM8L5MjwknJLCHo0sTkS5AQV9ERKSTuVBSzYaMPDJyzuPiAveM6s+06DB6+3s7ujQR6UIU9EVERDqJM5eq2JBuIfPoBdxcTUy8cwBTo8MI7OXl6NJEpAtS0BcREXGw/AsVrE+3sPdYER7urjx4dxgP3h2GX08PR5cmIl2Ygr6IiIiDnDpr5f11R8g6ch5vT1emx4YzeexAfHso4IvIzVPQFxERucVyC8pYn24h53QJvj3cefTeQcSPCaWHl7ujSxORbkRBX0RE5BYwDINv8kpZn27hm/wyevVw5/H7b+exyVFUVdQ6ujwR6YYU9EVERDqQYRgcOlXC+vTTnDxjxc/Hg6cmDSZudH883V3p4eWuoC8iHUJBX0REpAPYDIP9xy+xPt1C3vkKgnp5MndKJPeM6oe7m6ujyxMRJ6CgLyIi0o5sNoOvj10kJd1CYVEVffy9efahIcSM6Iubq8nR5YmIE1HQFxERaQdNNhuZRy6wISOPc8XV9AvqwfMzhnH3sD64mhTwReTWU9AXERG5CY1NNtIPn2dDhoWislpCg3148dERjIkMxmRycXR5IuLEFPRFRERuQENjE18ePMfnu/MottYR0deXp743mDsG98bkooAvIo6noC8iItIGdQ1N7Nh3hs+z8imvrMc8wI9npg5hxKBAXBTwRaQTUdAXERFphZq6RrbvO8OmrHwqqhsYEubPgpnDGRLmr4AvIp2Sgr6IiMg1VNc2sOXrQtK+LqCqtpERtwUyMzaCwaH+ji5NROSaFPRFRERaUFFdz+Y9BWzbW0hNXROjzb2ZOSGCQf16Obo0EZFWUdAXERH5jvLKOjZlFbB93xnqG5oYM6QPM2LCCQvxdXRpIiJtoqAvIiIClFhr+Twzn50HztLYZCN6WAjTYyIY0Luno0sTEbkhbQ76eXl55OXlcd9999nbDhw4wPLlyykrK2PWrFk8+eSTrZqrqqqKpKQkUlNTsVqtmM1mFi1axKRJk647Nj8/n8WLF5OZmYnNZmPs2LG8+uqrmM3mZv2SkpLIyckhJyeHkpISEhISeOmll5r1aWpqYuXKlXz11VccP34cq9VK//79mTp1KvPnz8fHx6dV5yMiIl1PUVkNG3fn8dXBcwDEjOjL9JhwQgJ6OLgyEZGb0+ag/7vf/Y6ysjJ70C8pKeH555+nuroaT09P/s//+T8EBQURHx9/3bkSEhI4cuQIP//5zwkNDeWzzz4jISGB999/n7i4uKuOKy4uZvbs2QQFBZGYmIirqyvLly9nzpw5JCcn07dvX3vfVatWERUVRXx8PGvWrGlxvtraWpYuXcqMGTN44oknCAgI4NChQyxbtoydO3eyevVq3Nz0yw8Rke7kQkk1KRkWMg5fwGSCe+/oz7ToMHr7ezu6NBGRdtHm9Hr48GGeeOIJ+88bNmygsrKS5ORkIiIieOaZZ1i5cuV1g/6OHTtIT09n6dKlTJ48GYDx48dTUFDA4sWLrxn0V6xYgdVqZe3atYSEhAAwevRoJk2axPLly3njjTfsfbOzszGZTFit1qsGfS8vL7Zu3UpAQIC9LTo6mqCgIH7xi1+wc+dOHnjgget/OSIi0umdKaokJSOPrKMXcHM18cCYATwUHU6Ar6ejSxMRaVemtg4oKSmhT58+9p+//PJL7rrrLiIjI/Hw8GDatGmcPHnyuvOkpaXh6+vbbJuOi4sLs2bN4tSpU5w4ceKqY7ds2UJsbKw95AMEBAQwceJE0tLSmvU1ma5/iq6urs1C/mUjR44E4Pz589edQ0REOre88xW899khfrUii/3HLzH17jDeejGW2fGRCvki0i21+Yq+t7c3FRUVwLd727Ozs5k7d679uJeXF5WVlded5/jx45jN5iuCeFRUFAC5ublX7LeHb7fZ5OfnM3Xq1CuORUVFkZKSQnFxMUFBQW06r5bs3r0bgMjIyJueS0REHOPUWSvrd53mwMlivD1dmREbwZRxA/Hxdnd0aSIiHarNQX/w4MH84x//4JFHHiE1NZXq6momTJhgP37mzBkCAwOvO09ZWRkRERFXtPv5+dmPt6S8vBzDMOz9vsvf398+9maDfkFBAe+88w533303Y8eOvam5RETk1sstKGN9uoWc0yX09HJj1r2DmDQmlB5eCvgi4hzaHPTnz5/Pj3/8Y2JjYwEYOnRosyC8a9cuhg0b1qq5rvXK8Ou9TrwjXzdeUlLCggUL8Pb25u23377heYKCHPO0nuBgPevZGWidnYPWuW0Mw+Dg8Uv8bcsxDp8sxt/Hk2dnDGNqTESnDvha5+5Pa+wcOts6tzno33///axcuZKtW7fi4+PDnDlz7KG7tLSUvn378uijj153Hn9//xav2peXlwO0eMX+cruLi0uLYy+3Xb6yfyNKS0uZN28eFRUV/M///E+zJ/i0VXFxJTabccPjb0RwsC9FRRW39DPl1tM6Owetc+sZhsGhU8Ws32Xh5Fkr/j4e/GDSYO4b3R9Pd1eqKmqpqqh1dJkt0jp3f1pj5+CodTaZXK56cfmGnhk5btw4xo0bd0V7QEAAS5cubdUcZrOZzZs3Y7PZmu3Tz83NBa6+L97Ly4uBAwfa+31Xbm4ugYGBN7xtp6ysjHnz5lFUVMSqVasYNGjQDc0jIiK3hs0w2Jd7iZR0C3kXKgjq5cXcB6O4Z2Q/3N3a/LwJEZFupV0eDt/Y2MjWrVspLy9n4sSJBAcHX3fM5MmT+fTTT9m2bVuzR3EmJyczaNCgFm/EvSw+Pp6PP/6YoqIi+2eVlZWxfft2pk+ffkPnUF5ezrPPPsv58+dZuXIlgwcPvqF5RESk49lsBl8fu8j6dAtniqro4+/Nsw8NIWZEX9xcFfBFROAGgv5bb71FZmYma9euBb79demzzz7L119/jWEY+Pv7s2bNGsLCwq45T1xcHNHR0bz22muUlZURGhpKcnIy2dnZLFu2zN5v7ty5ZGVlcezYMXvb/PnzWbduHQsWLGDRokW4ubmxfPly3NzcWLhwYbPPycrKoqSkhNrab39le+LECVJTU+01eHt7U1tby/z58/nmm294/fXXqa2tZf/+/fY5+vbte1NbeEREpH002WzszrnAhow8zpdU0y+oB8/PHMbdQ/vg2orHKYuIOJM2B/0vv/zSfiMuwLZt29izZw/PPfccQ4cO5T//8z/5wx/+wJtvvnnNeVxcXFi2bBlLliwhKSkJq9WK2Wxm6dKl1305Ve/evfn4449JTEzklVdewTAMxowZw0cffUT//v2b9X333XfJysqy/5yammoP+lu3biU0NJRLly5x6NAhAH7zm99c8XkJCQm89NJL1/5iRESkwzQ22Ug/fJ4NGRaKymoZ2MeHHz86gruigjF14MMZRES6MhfDMNp0t+i4ceP42c9+xuzZswF4/fXX2b17N1u2bAHg97//PevXr2fr1q3tX20Xo5txpaNonZ2D1hkaGpvYeeAcn2fmUWKtI6KvLzMnRDDa3LtDn752K2mduz+tsXPoFjfjNjQ04Orqav85MzOz2RX+gQMHUlRUdANlioiIfKuuvokv9p8hNTOf8qp6zKF+zJs6hOGDArtNwBcR6WhtDvp9+/Zl//79PPnkkxw/fpyCggJefvll+/Hi4mJ69OjRrkWKiIhzqKlrZNveQjZlFVBZ08DQ8AAWPDycIWH+CvgiIm3U5qA/ffp0li1bRklJCcePH8fHx4e4uDj78aNHj173RlwREZHvqqptYMvXhWz5uoCq2kZG3BbIw7GDMIe2/E4VERG5vjYH/RdeeIFz587ZX5iVmJhIr169AKioqGDbtm3MmzevvesUEZFuqKK6ns17CtiaXUhtfRN3Du7NjNgIBvXr5ejSRES6vDYHfQ8PD/7rv/6rxWM9e/bkq6++wsvL66YLExGR7qu8so7UrHy27ztDQ4ONMUP6MDM2goF9Wr6hTERE2q5dXph1mclkwtfXtz2nFBGRbqTEWsvnu/PZceAsTTYb44eFMD0mgv69ezq6NBGRbueGgn51dTV//OMfSUtLo7CwEIDQ0FCmTJnC/PnzdTOuiIg0U1RWw8bdeXx18BwAsSP6Mi0mnJAA/XshItJR2hz0y8rKePrppzl58iQBAQEMHToUAIvFwnvvvUdqaioff/wx/v7+7V6siIh0LedLqtmQbiEj5wImE9x3R38eGh9Gbz9vR5cmItLttTnov/POO5w6dYpf/epXPPXUU/Zn6jc1NbF69WrefPNNli5dyuuvv97uxYqISNdQWFTJhow8so5ewN3VxKQxoUyNDiPA19PRpYmIOI02B/1t27bx+OOP8/TTTzdrd3V1Zfbs2Rw9epQtW7Yo6IuIOKG88xWkpFvIzi3C08OVqdFhPDgujF49PRxdmoiI02lz0L906ZJ9u05Lhg0bxmeffXZTRYmISNdy8mw563dZOHiyGG9PN2bGRjB53EB8vN0dXZqIiNNqc9Dv3bs3R48everxo0eP0rt375sqSkREuoZj+aWkpFvIsZTS08uNWffdxqS7Qunh1a4PdRMRkRvQ5r+JJ06cyOrVqxk2bBhPPPEEJpMJAJvNxieffMLatWt58skn271QERHpHAzD4EheKet3WcgtKKNXD3cen3g7E+8cgJeHAr6ISGfhYhiG0ZYBpaWlPPXUU+Tn5xMYGMigQYMAOH36NCUlJYSFhfG3v/2NgICADim4KykursRma9PXe9OCg30pKqq4pZ8pt57W2Tl0tnU2DIODJ4tJSbdw8qyVAF9PpkaHEXdHfzzcXR1dXpfV2dZZ2p/W2Dk4ap1NJheCglp+2WCbL70EBASwdu1aPvzwQ7Zs2cKhQ4cAGDhwII899hjPP/88Pj56s6GISHdhMwz25V4iJd1C3oUKgnp58cyDUUwY2Q93N5OjyxMRkau4od+x+vj48LOf/Yyf/exnVxz729/+xqpVq9i4ceNNFyciIo5jsxns+eYiKRkWzhRV0SfAm2enDSFmeF/cXBXwRUQ6u3bfTFlaWsrp06fbe1oREblFGptsZB65QEpGHhdKqunfuycLZg5j3NA+uJoU8EVEugrdNSUiIsC3AX/XoXNsyMjjUnktA/v48ONHR3BXVDAmFxdHlyciIm2koC8i4uTqG5r48uA5Nu7Oo7SijkH9fJkdH8kd5iBcFPBFRLosBX0RESdVV9/E9n1n2JSVT3lVPYND/Xh22hCGRwQq4IuIdAMK+iIiTqamrpFtewvZlFVAZU0DQ8MDeOHh4USF+Svgi4h0I60K+n/6059aPeHevXtvuBgREek4VbUNpO0pYMvXhVTXNTLytiBmTojAPMDP0aWJiEgHaFXQT0xMbNOkuiIkItJ5WKvrSdtTwNbsQmrrm7hzcG9mToggom8vR5cmIiIdqFVBf9WqVR1dh4iItLOyyjpSM/P5Yv8ZGhpsjB3ShxmxEQzso5caiog4g1YF/bvvvruj6xARkXZSYq1l4+48dh44h81mED0shBmx4fQL6uno0kRE5BbSzbgiIt3ExbIaNmbksevQOQAmjOzLtPHh9Ano4eDKRETEERwa9KuqqkhKSiI1NRWr1YrZbGbRokVMmjTpumPz8/NZvHgxmZmZ2Gw2xo4dy6uvvorZbG7WLykpiZycHHJycigpKSEhIYGXXnqpxTkPHz7M22+/zYEDB3B3d+eee+7hF7/4BSEhIe1yviIiHeFccRUbMvLYnXMBk8mF+0b3Z1p0OEF+Xo4uTUREHMih7zJPSEhg/fr1/OQnP+GDDz7AbDaTkJDAjh07rjmuuLiY2bNnc+bMGRITE1myZAnl5eXMmTOH8+fPN+u7atUqKisriY+Pv+acJ0+eZO7cuRiGwf/9v/+X//zP/+TIkSPMnTuXqqqqmz5XEZH2VlhUyfv/OMzrH2by9TcXiR8bSuLCGOZOiVLIFxERx13R37FjB+np6SxdupTJkycDMH78eAoKCli8eDFxcXFXHbtixQqsVitr1661X20fPXo0kyZNYvny5bzxxhv2vtnZ2ZhMJqxWK2vWrLnqnO+88w49e/bk/fffp0ePb3/NPXjwYGbMmMHHH3/MggUL2uO0RURuWt75CtanW9ibW4SnhytTx4fx4LgwevX0cHRpIiLSiTjsin5aWhq+vr7Ntum4uLgwa9YsTp06xYkTJ646dsuWLcTGxjbbUhMQEMDEiRNJS0tr1tdkuv4pNjQ08MUXXzB16lR7yAe4/fbbueOOO9i8eXNbTk1EpEOcPFPO7z85wBt/3sPRvFIenhDB2y/G8vj9ZoV8ERG5gsOu6B8/fhyz2XxFEI+KigIgNzf3iv32ALW1teTn5zN16tQrjkVFRZGSkkJxcTFBQUGtrqWgoIDa2loGDx7c4pzJycmtnktEpL0dyy9lfbqFI5ZSfLzd+d59t/HAXaH08NLzFERE5Ooc9q9EWVkZERERV7T7+fnZj7ekvLwcwzDs/b7L39/fPrYtQf/yZ11tztraWmpra/Hy0p5XEbk1DMNg37GLfLTxCLmF5fTq6cETE83cf2d/vDwU8EVE5Poc+q/Ftd6ge72363bE23dvpp6WBAU55qU0wcG+DvlcubW0zt2TYRjsOXqBNWm5HMsvJcjPiwWPjmTK+HA83V0dXZ50EP157v60xs6hs62zw4K+v79/i1fty8vLgZavrl9ud3FxaXHs5bbLV/bbUst3x//znF5eXnh6erZpToDi4kpsNqPN425GcLAvRUUVt/Qz5dbTOnc/NsNgX24R69Mt5F+opLefF4seu4NREQG4u5mwllU7ukTpIPrz3P1pjZ2Do9bZZHK56sVlhwV9s9nM5s2bsdlszfbp5+bmAhAZGdniOC8vLwYOHGjv9125ubkEBga2adsOwMCBA/Hy8uL48eMtztnS3n0RkfZgsxlkfXOBDel5nLlURUiANz+aNpTxw0Po19dP4UBERG6Yw566M3nyZKxWK9u2bWvWnpyczKBBg1q8Efey+Ph40tPTKSoqsreVlZWxfft2+6M628Ld3Z24uDg2bdpETU2Nvf306dPs37+fKVOmtHlOEZFraWyy8dXBc7z24W7+sO4IBrDg4WH89vnx3DOqH26uDn3NiYiIdAMOu6IfFxdHdHQ0r732GmVlZYSGhpKcnEx2djbLli2z95s7dy5ZWVkcO3bM3jZ//nzWrVvHggULWLRoEW5ubixfvhw3NzcWLlzY7HOysrIoKSmhtrYWgBMnTpCammqvwdvbG4CXX36Zxx9/nBdffJEf/ehH1NTUkJSUxIABA5g9e3ZHfx0i4iQaGm3sOnyOjRl5XCqvJayPDz9+dAR3RQVj6oB7j0RExHm5GIZxazeRf0dlZSVLlixh06ZNWK1WzGYzixYtavYW25aCPoDFYiExMZHMzEwMw2DMmDG8+uqrV2yzuTy+JVu3biU0NNT+88GDB/nd737HwYMHcXNzY8KECfziF7+gX79+N3R+2qMvHUXr3PXUNzSx88BZPs/Mp7SijkH9ejFzQgR33B501Zv9tc7OQevc/WmNnUNn3KPv0KDf3SnoS0fROncdtfWNfLHvLJuy8imvqicy1I+ZEwYxLCLguk/z0jo7B61z96c1dg6dMejrYcwiIh2gpq6RrdmFbN5TQGVNA0PDA1j4yHCiwgIcXZqIiDgJBX0RkXZUWdPAlq8L2PJ1IdV1jYy6PYgZsRGYB7T8yGAREZGOoqAvItIOrFX1bN5TwLa9hdTWN3FXZDAzYsOJ6NvL0aWJiIiTUtAXEbkJZZV1pGbm88W+MzQ02hg3tA8zYiII7eOYN2OLiIhcpqAvInIDistr+Twzj50HzmGzGYwfHsL0mHD6BfV0dGkiIiKAgr6ISJtcLKthY4aFXYfOAzBhZD+mxYTTx9/bsYWJiIj8EwV9EZFWOFdcxYaMPHbnXMBkciFudH8eig4nyM/L0aWJiIi0SEFfROQaCi9WkpJhYc/Ri7i7m4gfG8rU6DD8fTwdXZqIiMg1KeiLiLTAct7K+l0W9h2/hKeHKw+ND2fK3QPp1cPD0aWJiIi0ioK+iMh3nDhTTkq6hYMni+nh6cbDEyKIHzsQH293R5cmIiLSJgr6IiLAsfxS1u2ycDSvFB9vd74fdxsT7wylh5f+mhQRka5J/4KJiNMyDIMcSwkpuyzkFpbTq6cHT0w0M/HOAXh6uDq6PBERkZuioC8iTscwDA6cLGb9Lgunz1kJ8PXk6cmR3DuqHx7uCvgiItI9KOiLiNOwGQZ7jxX9v/buPSrKet8f+HuG4SIJchHBAEEdgQQNQuViggqEqV1ot9MtoGenFgpZtu3oWaxOv3Z6tpZHLQ20bO2jp9pmGhhkIiqpOQiloikpKHJTURwYRmC4zvP7o8WciLEQGJ5heL/WarX8Pt/vPJ+Hj8ibZ74zg0xFKcrv1GP4MCssnOWNqX4jYS6Til0eERFRn2LQJyKTp9UKyP/5NjJzy3DzbgOcHayxeM4jCBrvDJkZAz4REZkmBn0iMllt7VqcvnQb3+SW4natBq7DH8LLT/tiss8ISKUSscsjIiIyKAZ9IjI5rW1anPrpFg6eLsPduiaMch6KxBg/BHg5QSphwCciosGBQZ+ITEZLazuOn7+JQ3nlqL3XjDEP2yI2ygsTxzpCwoBPRESDDIM+EQ14TS1t+O7cTRzKL4e6oQVebsPw4uxHMN7TngGfiIgGLQZ9IhqwGpvacPRsjhYNywAAIABJREFUJbJ/qEC9phXjPe3x1DO+8B5lL3ZpREREomPQJ6IBp17TiiM/VuDIj5VobG7DxLGOeCrUE2Ndh4ldGhERkdFg0CeiAUPd0IKsH8px7OwNNLe04zEvJzwV6gkPFxuxSyMiIjI6DPpEZPRq7zUjK78c3527gdY2LSY/MgJzQz3h5jRU7NKIiIiMFoM+ERmtu3UafJtXjpPnb0GrFRDi64zZIR4Y6fiQ2KUREREZPQZ9IjI6d2ob8U1uGRQXqwAAUyeMxOwQD4ywGyJyZURERAMHgz4RGY1bygZkKsqQV3gbUqkE0/1d8WTwKDjYWoldGhER0YAjatBvaGjA5s2bcejQIajVasjlciQmJiIiIuIP15aXl2P9+vXIy8uDVqvFpEmTsHr1asjl8i5zd+/ejc8++ww3btyAi4sL5s2bh8WLF0MqlXaal5WVhX/+85+4du0aAGDMmDFYtGgRZs+e3TcXTER6Vd6pR4aiFD9evgNzcymiJrshesoo2A21FLs0IiKiAUvUoJ+UlITCwkKsWrUKbm5uSEtLQ1JSErZv347w8PD7rlMqlViwYAEcHR2xYcMGmJmZITU1FXFxcUhPT4eLi4tubkpKCrZu3YqEhAQEBwfj3Llz2LJlC+rq6rBq1SrdvLS0NKxZswbR0dFYtmwZAGD//v1YuXIlGhsb8fzzzxvuC0E0SJVWqZFxqhTniu/CysIMs0M8EDXZHbbWFmKXRkRENOCJFvSPHz8OhUKBbdu2ISoqCgAQHByMiooKrF+//neD/ieffAK1Wo39+/fD2dkZAODv74+IiAikpqbi7bffBgDU1tZi+/btiI2NxauvvgoACAoKgkajwc6dOxEXF6f7peCrr76Cq6srtmzZorvTP23aNERGRuLAgQMM+kR96OqNOmScKsVPJUpYW8rwzOOjERHohqFDzMUujYiIyGRI/3iKYWRnZ8PGxqbTNh2JRIKYmBiUlJTg6tWr91175MgRhIaG6kI+ANjb22PGjBnIzs7WjZ08eRLNzc2IiYnptD4mJgZtbW04evSobkwmk8Ha2rrTdh6pVApra2tYWPDuIlFvCYKAy2W1eO9f5/Bf/3sG12+p8afwMXhveSieeXw0Qz4REVEfEy3oFxcXQy6Xd9kn7+3tDQAoKirSu66pqQnl5eXw8vLqcszb2xtKpRJKpVJ3DolEgnHjxnWa5+npCSsrKxQXF+vGYmNjce3aNaSmpqKmpgY1NTVITU3F9evXsWjRol5dK9FgJggCLpYosf6zs3j3X+dw824D5s2U471loZgT4okhlnxPACIiIkMQ7SesSqWCp6dnl/Fhw4bpjutTV1cHQRB0837Nzs5Ot9bR0REqlQpDhgzRe0fe1ta20zkiIyORmpqKN954A1u2bAEAWFtb4/3330dYWNgDXx/RYCcIAs5fVSJDUYrrt9RwsLVEbJQXpk0cCQtzM7HLIyIiMnmi3kqTSCQ9Otad4w96/lOnTuFvf/sb5syZg+joaLS3tyMjIwOvv/46PvjgA0yfPv2BH9/RUZxP7XRyshHlvNS/jLXPWq2A3J9u4YsjV3D9phrODtZI+vOjmDlpFMxloj2JOGAZa5+pb7HPpo89HhyMrc+iBX07Ozu9d+3r6uoAQO8d+45xiUSid23HWMedfTs7O2g0GrS0tHS5q69Wq3XnEAQBq1evRnBwMP7+97/r5oSFhaGqqgrvvPNOj4K+UlkPrVZ44HW94eRkg+rqe/16Tup/xtjndq0W+T/fQaaiFLeUjXB2sMbiOY8gaLwzZGZSqGobxC5xwDHGPlPfY59NH3s8OIjVZ6lUct+by6IFfblcjsOHD0Or1Xbap9+xN1/fHnwAsLKygru7u949/EVFRXBwcICjo6PuHIIgoLi4GL6+vrp5ZWVlaGpq0u3dv3v3Lqqrq+Hn59flMf38/JCfn4/m5mZYWvI9vYl+q61di9xLVfgmtwx3ajVwHf4QXn7aF5N9RkAq7f0zb0RERNQzoj2PHhUVBbVajWPHjnUaT09Px+jRo/V+8FWHyMhIKBQKVFdX68ZUKhVycnJ0b9UJ/HJH3sLCAgcOHOi0Pi0tDTKZDDNnzgTwy7MElpaWuHDhQpdznT9/HnZ2dgz5RL/R2qZFzrkb+I8dp/HPg5dhZWGGxJgJeHvxFASNd2bIJyIiEplod/TDw8MRFBSE5ORkqFQquLm5IT09HWfOnEFKSopuXnx8PPLz83HlyhXd2OLFi/H111/jpZdeQmJiImQyGVJTUyGTyZCQkKCbZ29vj5dffhkpKSmwsbFBUFAQCgoKsHPnTixcuBAjR44EAFhYWGD+/PnYtWsXkpOTER0dDa1Wq6vntdde678vDJGRa25tx4nzN3Eorxy195ox9mFbxEd7YcIYxz557QwRERH1DYkgCP27ifxX6uvrsWnTJmRlZUGtVkMulyMxMRGRkZG6OfqCPgCUlpZiw4YNyMvLgyAICAwMxOrVq7u8laYgCNi1axc+//xz3Lx5EyNGjMC8efOwdOnSTluG2tvb8eWXX2Lv3r0oLy+HVCqFp6cnYmNj8fTTT/cowHCPPhmKGH1uamlDzrkbyMorh7qxFV7udnhqqifGe9gz4BsIv58HB/bZ9LHHg4Mx7tEXNeibOgZ9MpT+7HNjUxuOnq1E9g8VqNe0wtfTHnNDPeE9yr5fzj+Y8ft5cGCfTR97PDgYY9DnJ9UQkV71mlZk/1CBI2cqoWluw6NjHTF3qifGPqz/HbGIiIjIuDDoE1En6oYWZOWX49i5G2huaUeglxPmhnrCw8W43huYiIiIfh+DPhEBAGrvNeNQXjmOF9xAa7sWUx5xxtwQD7g6ifPBb0RERNQ7DPpEg9zdOg2+PV2OkxduQqsFQvycMSfEEy4O1mKXRkRERL3AoE80SN2ubcQ3uWXIvVgFAHh84kjMDvaAk90QkSsjIiKivsCgTzTI3LzbgG9yS3G68DZkZlJMD3DFk0Gj4GBrJXZpRERE1IcY9IkGiYo79chQlOLM5TswN5cievIoRE9xx7Ch/NRnIiIiU8SgT2Tirt9SI1NRinPFd2FlYYbZIR54YrI7bKwtxC6NiIiIDIhBn8hEXa2sw9eK67hYUoOHrGR49vHRiJjkhoeszMUujYiIiPoBgz6RCREEAZfLVcg4dR2Xy1UYOsQcfwofg5mPuWGIJb/diYiIBhP+5CcyAYIg4OL1GmQoSnG1sg7Dhlpg/kw5wv1dYWlhJnZ5REREJAIGfaIBTBAEFFy9i0xFKa7fugcHW0vERnkh7NGRMJcx4BMREQ1mDPpEA5BWK+CHy3eQcaoUldX1cLKzwr896YNQPxfIzKRil0dERERGgEGfaABp12qR//MdHMovR8Xterg4WGPJ3EcQNN4ZZlIGfCIiIvo/DPpEA0Bbuxa5F6vwzeky3KnVwMPFBgnP+GKS9whIpRKxyyMiIiIjxKBPZMRa27T4/qdbOJhbBqW6CR7ONkh6bgKiQkZDqawXuzwiIiIyYgz6REaoubUdJwpu4tu8MqjqWzD2YVvER3thwhhHSCQS3sUnIiKiP8SgT2RENM1t+O7cDWTll0Pd2ApvdzssmTsej3jYQyJhuCciIqLuY9AnMgKNTa04eqYSh3+oQENTG3xHO+CpUE94uduJXRoRERENUAz6RCKq17Ti8A8VOHqmAprmdvjLh2NOqAfGPjxM7NKIiIhogGPQJxJBXUMLsvLLkXP2Bppb2xHo7YSnQj0xytlG7NKIiIjIRDDoE/Wj2nvN+DavDCcKbqK1XYugR5wxJ8QDrk5DxS6NiIiITAyDPlE/uFunwcHT5fj+wk0IAhDi64LZIR5wcbAWuzQiIiIyUQz6RAZ0u7YR3yjKkHupCgAwbeJIzA72wHC7ISJXRkRERKaOQZ/IAG7ebUBmbinyCm9DZibFjABXzAoaBQdbK7FLIyIiokFC1KDf0NCAzZs349ChQ1Cr1ZDL5UhMTERERMQfri0vL8f69euRl5cHrVaLSZMmYfXq1ZDL5V3m7t69G5999hlu3LgBFxcXzJs3D4sXL4ZUKu00TxAE7N27F1988QWuXbsGc3NzjBkzBmvWrMFjjz3WZ9dNpqv89j1kKkpx5ko1LMzNED15FKKnuGPYUEuxSyMiIqJBRtSgn5SUhMLCQqxatQpubm5IS0tDUlIStm/fjvDw8PuuUyqVWLBgARwdHbFhwwaYmZkhNTUVcXFxSE9Ph4uLi25uSkoKtm7dioSEBAQHB+PcuXPYsmUL6urqsGrVqk6Pm5ycjMOHD2PJkiUICAiARqPBxYsXodFoDPY1INNw/ZYaGadKUXD1LoZYmmFOqAeiJrnDxtpC7NKIiIhokBIt6B8/fhwKhQLbtm1DVFQUACA4OBgVFRVYv3797wb9Tz75BGq1Gvv374ezszMAwN/fHxEREUhNTcXbb78NAKitrcX27dsRGxuLV199FQAQFBQEjUaDnTt3Ii4uTvdLQVZWFtLS0vD5558jICBAd67p06cb4vLJRBRXqpBxqhQXr9fgISsZnp02GpGBbrC2Mhe7NCIiIhrkpH88xTCys7NhY2PTaZuORCJBTEwMSkpKcPXq1fuuPXLkCEJDQ3UhHwDs7e0xY8YMZGdn68ZOnjyJ5uZmxMTEdFofExODtrY2HD16VDf26aefYtKkSZ1CPpE+giDg59IavPv5Wfzj07Mou30Pz08fi3eXheLpqaMZ8omIiMgoiBb0i4uLIZfLu+yT9/b2BgAUFRXpXdfU1ITy8nJ4eXl1Oebt7Q2lUgmlUqk7h0Qiwbhx4zrN8/T0hJWVFYqLiwEAra2tKCgogLe3NzZt2oTQ0FCMHz8ec+bMQVpaWq+vlUyDIAj4qUSJf3x6Fu/tKcCtmkbMjxiHdxNCMTvYA0Ms+dp2IiIiMh6iJROVSgVPT88u48OGDdMd16eurg6CIOjm/ZqdnZ1uraOjI1QqFYYMGQILi677pG1tbXXnUKlUaGlpQVpaGlxcXPDmm2/C1tYW+/btw5o1a9Da2ooXXnihp5dKA5wgCCgovosMRSlKq+7BwdYScU94YdrEkTCXmYldHhEREZFeot6ClEgkPTrWneMPcn6tVgsAaG5uxkcffQRXV1cAQGhoKCoqKvDhhx/2KOg7OorzaadOTjainNfUtGsF5P50E19kF6H0lhoujtZ45QV/zAh0h7lMtCfDdNjnwYF9HhzYZ9PHHg8OxtZn0YK+nZ2d3rv2dXV1AKD3jn3HuEQi0bu2Y6zjzr6dnR00Gg1aWlq63NVXq9W6c3Q85pgxY3QhH/jlF4Fp06YhJSUFSqUSjo6OD3SNSmU9tFrhgdb0lpOTDaqr7/XrOU1Nu1aL/MI7yMwtxS1lI1wcrLFk7iMIGu8MM6kUqtoGsUtknwcJ9nlwYJ9NH3s8OIjVZ6lUct+by6IFfblcjsOHD0Or1Xbap9+xN1/fHnwAsLKygru7u949/EVFRXBwcNAFcrlcDkEQUFxcDF9fX928srIyNDU16fbuW1lZwcPDQ+/5BOGXoN4XzyCQcWtr10JxsQoHc8twR6WBm9NDSHjGF5O8R0AqZf+JiIhoYBFt/0FUVBTUajWOHTvWaTw9PR2jR4/W+8FXHSIjI6FQKFBdXa0bU6lUyMnJ0b1VJwCEhYXBwsICBw4c6LQ+LS0NMpkMM2fO7FRPSUkJKisrdWOCIODEiRNwd3eHg4NDj6+VjFtrWztyzlbiP3bk4n++vYwhVjK88twE/L8Xp2DKI84M+URERDQgiXZHPzw8HEFBQUhOToZKpYKbmxvS09Nx5swZpKSk6ObFx8cjPz8fV65c0Y0tXrwYX3/9NV566SUkJiZCJpMhNTUVMpkMCQkJunn29vZ4+eWXkZKSAhsbGwQFBaGgoAA7d+7EwoULMXLkyE6PmZGRgSVLliApKQk2NjbYv38/Ll26hM2bN/fPF4X6VXNrO44X3MShvDKo6lsw1tUW8dE+mDDGgc/gEBER0YAnETr2poigvr4emzZtQlZWFtRqNeRyORITExEZGamboy/oA0BpaSk2bNiAvLw8CIKAwMBArF69ustbaQqCgF27duHzzz/HzZs3MWLECMybNw9Lly7t8taelZWVePfdd5Gbm4umpiZ4eXlh2bJlnep5ENyjb5w0zW3IOXcDWfnluNfYCp9Rdngq1BM+HvYDJuCzz4MD+zw4sM+mjz0eHIxxj76oQd/UMegbl8amVhw5U4nsHyrQ0NQGv9EOmBvqCS93O7FLe2Ds8+DAPg8O7LPpY48HB2MM+vyEHzJ59xpbkP1jBY6eqYSmuR3+8uGYG+qJMQ/bil0aERERkcEw6JPJqqtvRlZ+BXLO3UBLazsCvZ0wN9QTo5yN6z1uiYiIiAyBQZ9MTu29Znx7ugzHz99EW7sWQeOdMSfEE67DHxK7NCIiIqJ+w6BPJuOuSoODp8vw/U+3IAhAiK8L5oR4wNnBWuzSiIiIiPodgz4NeLdrGvFNbhlyL1VBIgEen/gwZgeNwnC7IWKXRkRERCQaBn0asG7cbcA3ilLk/XwbMjMpZjzmillTRsHB1krs0oiIiIhEx6BPA0757XvIUJTi7JVqWJibIXrKKERPGYVhD1mIXRoRERGR0WDQpwGj5KYamYpSFFy9iyGWZpgT6omoSW6wsWbAJyIiIvotBn0yekUVKmQoSnHpeg0espLh2WmjERnoBmsrc7FLIyIiIjJaDPpklARBwM9ltcg4VYorFSrYWpvjz9PHYnqAK4ZY8q8tERER0R9hYiKjIggCfiqpQYbiOq7dUGPYUAvMjxiHcP+HYWluJnZ5RERERAMGgz4ZBa0goKD4LjIUpSirugdHW0vEP+GFxyeOhLmMAZ+IiIjoQTHok6i0WgE/XrmDTEUpKqsbMMJuCP76pA9C/FwgM5OKXR4RERHRgMWgT6Jo12qRV3gb3+SW4ZayESMdrbF07nhMGT8CZlIGfCIiIqLeYtCnftXWroXiYhW+yS1FtaoJbk5DsexZPwR6OUEqlYhdHhEREZHJYNCnftHa1o6TF27h29NlUKqb4elig/l/GodH5cMhlTDgExEREfU1Bn0yqObWdhw/dwPf5pejrr4FctdhWDjLB36jHSBhwCciIiIyGAZ9MghNcxuOna3E4R8qcK+xFT6j7PDSU77wGWXHgE9ERETUDxj0qU81NrXiyI+VyP6xAg1NbfAb44CnQj0xzs1O7NKIiIiIBhUGfeoT9xpbcPiHChw7WwlNczv85cPx1FRPjB5pK3ZpRERERIMSgz71Sl19M7LyK5Bz7gZaWtsR6DMCc0M8MMrZRuzSiIiIiAY1Bn3qkRp1E77NK8eJ8zfR1q5F8HhnzA7xhOvwh8QujYiIiIjAoE8PqFqlwcHTZfj+wi0AQIifC+aEeMDZ3lrkyoiIiIjo1xj0qVtu1zQiM7cUuRdvQyoFwh59GE8GjcJwuyFil0ZEREREejDo0++6UV2PzNwy5P98GzIzKWYGuuLJIA/Y21iKXRoRERER/Q4GfdKrrOoeMhWlOFNUDUtzM8yaMgpPTBmFYQ9ZiF0aEREREXWDVMyTNzQ0YO3atXj88ccxceJEPPfcczh69Gi31paXl2P58uUIDAxEQEAAli5diqtXr+qdu3v3bkRHR8PPzw+RkZH4+OOPodVq7/vYgiBg4cKF8Pb2xrp163p0bQNVyU013v/yPN7+nx9QWFaDuaGeeG95KP48Q86QT0RERDSAiHpHPykpCYWFhVi1ahXc3NyQlpaGpKQkbN++HeHh4fddp1QqsWDBAjg6OmLDhg0wMzNDamoq4uLikJ6eDhcXF93clJQUbN26FQkJCQgODsa5c+ewZcsW1NXVYdWqVXoff+/evSgpKenz6zVmRRUqZJy6jkultXjISoaYaaMREegGaytzsUsjIiIioh4QLegfP34cCoUC27ZtQ1RUFAAgODgYFRUVWL9+/e8G/U8++QRqtRr79++Hs7MzAMDf3x8RERFITU3F22+/DQCora3F9u3bERsbi1dffRUAEBQUBI1Gg507dyIuLq7TLwUAcPv2bbz33ntYt24dVqxYYYhLNxqCIODnslpknCrFlQoVbK3N8ecZYzEjwBVWFtzVRURERDSQibZ1Jzs7GzY2NoiIiNCNSSQSxMTEoKSk5L7bcADgyJEjCA0N1YV8ALC3t8eMGTOQnZ2tGzt58iSam5sRExPTaX1MTAza2tr0bhN66623MGnSJERHR/fm8oyaIAi4cO0u/ut/z2DjngLcrm3EXyLGYcOyUDwZ5MGQT0RERGQCREt0xcXFkMvlkEo7/67h7e0NACgqKoJcLu+yrqmpCeXl5Zg1a1aXY97e3sjMzIRSqYSjoyOKi4shkUgwbty4TvM8PT1hZWWF4uLiTuOZmZnIy8vDwYMHe3t5RkkrCDhXdBeZilKU3b4HR1srxEd74/EJI2EuE/XlGkRERETUx0QL+iqVCp6enl3Ghw0bpjuuT11dHQRB0M37NTs7O91aR0dHqFQqDBkyBBYWXV9Eamtr2+kcNTU1WLduHVauXImRI0f25JKMllYr4Mcrd5ChKMWN6gaMsB+Cv872QYivC2RmDPhEREREpkjUPRoSiaRHx7pz/EHPv27dOri5uSEuLq7Xj9vB0XFonz3WH/nuTAV2f/sz7tZqMNx+CBY++Qim+bvi+LlK7D1SjBvV9XB3Hoq/LXgM0/xdYcaAP+A5OdmIXQL1A/Z5cGCfTR97PDgYW59FC/p2dnZ679rX1dUBgN479h3jEolE79qOsY47+3Z2dtBoNGhpaelyV1+tVuvOcerUKRw8eBC7du1CfX19p3ktLS1Qq9WwtraGTPZgXy6lsh5arfBAa3oi91IVdn17GS1tv7xlaHWtBlv2nMNHaRdwT9MG9xFDsfxZPzzm7QSpRIKamgaD10SG5eRkg+rqe2KXQQbGPg8O7LPpY48HB7H6LJVK7ntzWbSgL5fLcfjwYWi12k779IuKigAAXl5eetdZWVnB3d1dN+/XioqK4ODgAEdHR905BEFAcXExfH19dfPKysrQ1NSk27tfXFwMrVaL+Pj4Lo+5Z88e7NmzBx9//DHCwsJ6fsEG9NXxa7qQ36FdK6CxuR2v/GkC/OXD++QZECIiIiIaOEQL+lFRUdi3bx+OHTuGyMhI3Xh6ejpGjx6t94W4HSIjI/HZZ5+huroaTk5OAH65m5+Tk4M5c+bo5oWFhcHCwgIHDhzoFPTT0tIgk8kwc+ZMAMCsWbPwyCOPdDnPwoULER0djdjYWN2LhI2RUt2sd7xdKyBgnFM/V0NERERExkC0oB8eHo6goCAkJydDpVLBzc0N6enpOHPmDFJSUnTz4uPjkZ+fjytXrujGFi9ejK+//hovvfQSEhMTIZPJkJqaCplMhoSEBN08e3t7vPzyy0hJSYGNjQ2CgoJQUFCAnTt3YuHChboX3bq4uHR5P/0Ozs7OCAoKMtBXoW842lrqDfuOtpYiVENERERExkC0oC+RSJCSkoJNmzZh8+bNUKvVkMvl2LZtm+5O+/0MHz4cn332GTZs2IB///d/hyAICAwMxKeffoqHH36409zExEQMHToUn3/+OXbs2IERI0bglVdewdKlSw15ef3qufCxnfboA4CFTIrnwseKWBURERERiUkiCILhXy06SPXXi3GBX16Q+9Xxa6hRN8PB1hLPhY9FiK/+Zylo4OMLuwYH9nlwYJ9NH3s8OPDFuGQwIb4uCPF14T8mRERERAQA4JupExERERGZIAZ9IiIiIiITxKBPRERERGSCGPSJiIiIiEwQgz4RERERkQli0CciIiIiMkEM+kREREREJohBn4iIiIjIBDHoExERERGZIH4yrgFJpZJBdV7qX+zz4MA+Dw7ss+ljjwcHMfr8e+eUCIIg9GMtRERERETUD7h1h4iIiIjIBDHoExERERGZIAZ9IiIiIiITxKBPRERERGSCGPSJiIiIiEwQgz4RERERkQli0CciIiIiMkEM+kREREREJohBn4iIiIjIBDHoDwANDQ1Yu3YtHn/8cUycOBHPPfccjh492q215eXlWL58OQIDAxEQEIClS5fi6tWrBq6YeqKnff7yyy+RkJCAGTNmYOLEiXjiiSewdu1a1NTU9EPV9KB68/3cQRAELFy4EN7e3li3bp2BKqXe6E2fBUHAF198geeeew6PPvooJk2ahBdeeAFnz541cNX0IHrT46ysLMyfPx+TJ0/G5MmTMW/ePBw8eNDAFVNPVFVVYe3atfjLX/6CgIAAeHt7Iy8vr9vrL168iEWLFsHf3x+TJ0/GypUrcfv2bQNW3BmD/gCQlJSEjIwMvPrqq9ixYwfkcjmSkpJw/Pjx312nVCqxYMEC3LhxAxs2bMCmTZtQV1eHuLg4VFVV9VP11F097fMHH3yAoUOH4vXXX8fOnTvxb//2b/j222/x/PPPQ61W91P11F097fOv7d27FyUlJQasknqrN31OTk7Ge++9hyeeeAIfffQRNm7ciLCwMGg0mn6onLqrpz1OS0vDihUrMGLECGzcuBEbN26Es7MzVq5ciX379vVT9dRdZWVl+Oabb2BtbY3g4OAHWnvt2jXEx8dDEAS8//77eOedd1BYWIj4+Hg0NDQYqOLfEMiofffdd4KXl5dw+PBh3ZhWqxXmz58vzJo163fXbtiwQZgwYYJQVVWlG6upqRECAgKE//zP/zRYzfTgetPnu3fvdhnLy8sTvLy8hN27d/d5rdRzvelzh6qqKiEwMFA4dOiQ4OXlJaxdu9ZQ5VIP9abPhw4dEnx8fISzZ88aukzqhd70OC4uTpgxY4bQ3t6uG2tvbxdmzJghxMXFGaxm6plf9yk7O1vw8vISTp8+3a21K1asEKZOnSo0NDToxq5evSr4+PgIO3bs6PNa9eEdfSOXnZ0NGxsbRERE6MYkEgliYmJQUlLyu9twjhw5gtDQUDg7O+vG7O3exJ00AAALRklEQVTtMWPGDGRnZxu0bnowvemzo6Njl7EJEyYAAJ+5MTK96XOHt956C5MmTUJ0dLQhS6Ve6E2fP/30U0yaNAkBAQH9USr1UG96LJPJYG1tDan0/yKYVCqFtbU1LCwsDFo3Pbhf9+lBtLa24rvvvsOsWbNgbW2tGx87diweffRRHD58uK9K/F0M+kauuLgYcrm8y180b29vAEBRUZHedU1NTSgvL4eXl1eXY97e3lAqlVAqlX1fMPVIT/t8P6dPnwYAjBs3rm8KpD7R2z5nZmYiLy8Pb731lsFqpN7raZ9bW1tRUFAAb29vbNq0CaGhoRg/fjzmzJmDtLQ0g9dN3deb7+XY2Fhcu3YNqampqKmpQU1NDVJTU3H9+nUsWrTIoHVT/6moqEBTU5Pen8Pe3t4oLi7ulzpk/XIW6jGVSgVPT88u48OGDdMd16eurg6CIOjm/ZqdnZ1urb67wdT/etrn+z3W2rVr4enpidmzZ/dVidQHetPnmpoarFu3DitXrsTIkSMNVSL1gZ72WaVSoaWlBWlpaXBxccGbb74JW1tb7Nu3D2vWrEFrayteeOEFQ5ZO3dSb7+XIyEikpqbijTfewJYtWwAA1tbWeP/99xEWFmaQeqn/dfwduF8Oa2pqQlNTE6ysrAxaB4P+ACCRSHp0rDvHyXj0ps8dNBoNEhMTUVdXh08//ZRPAxuhnvZ53bp1cHNzQ1xcnCHKoj7Wkz5rtVoAQHNzMz766CO4uroCAEJDQ1FRUYEPP/yQQd+I9PR7+dSpU/jb3/6GOXPmIDo6Gu3t7cjIyMDrr7+ODz74ANOnTzdAtSSWvvjZ3hsM+kbOzs5O752Buro6APp/U+wYl0gketd2jHXc2Sfx9bTPv9bU1IRly5ahsLAQn3zyCXx8fPq8Tuqdnvb51KlTOHjwIHbt2oX6+vpOx1paWqBWq2FtbQ2ZjP+kG4Pe/rs9ZswYXcgHfgkD06ZNQ0pKCpRKJZ+JNQI97bEgCFi9ejWCg4Px97//XTceFhaGqqoqvPPOOwz6JuLXuyd+S6VSwcrKCpaWlgavg3v0jZxcLse1a9d0d3o6dOz/07cHHwCsrKzg7u6ud59gUVERHBwc+MPCiPS0zx2am5uxfPlyFBQUYMeOHXjssccMViv1XE/7XFxcDK1Wi/j4eN37bk+ePBkAsGfPHkyePBkKhcKwxVO39ebfbQ8PD73HBEEAwGdpjUVPe3z37l1UV1fDz8+vyzE/Pz9UVlaiubm57wumfufu7g4rKyu9e/GLior67TV0DPpGLioqCmq1GseOHes0np6ejtGjR0Mul993bWRkJBQKBaqrq3VjKpUKOTk5iIqKMljN9OB60+eWlhYsX74cP/74I1JSUjBlyhRDl0s91NM+z5o1C7t37+7yHwBER0dj9+7dmDhxosHrp+7pzfdzVFQUSkpKUFlZqRsTBAEnTpyAu7s7HBwcDFY3dV9Pezxs2DBYWlriwoULXY6dP38ednZ2/XKXlwzP3Nwc4eHhyMrK6vQZGNevX0dBQQGeeOKJfqmDz/MaufDwcAQFBSE5ORkqlQpubm5IT0/HmTNnkJKSopsXHx+P/Px8XLlyRTe2ePFifP3113jppZeQmJgImUyG1NRUyGQyJCQkiHE5dB+96fOKFSvw/fffIzExEdbW1igoKNAdc3BwwKhRo/r1Wuj+etpnFxcXuLi46H1MZ2dnBAUF9Uv91D29/Xc7IyMDS5YsQVJSEmxsbLB//35cunQJmzdvFuNySI+e9tjCwgLz58/Hrl27kJycjOjoaGi1Wt3a1157TaxLot9x6NAhAMBPP/0EAPjhhx9QW1uLIUOGIDw8HAAwc+ZMAOj0y9+KFSvw5z//GcuWLcOLL74IjUaDzZs3w9XVFQsWLOiX2iVCx/OBZLTq6+uxadMmZGVlQa1WQy6XIzExEZGRkbo5+n5gAEBpaSk2bNiAvLw8CIKAwMBArF69mm+7aIR62ueOt3PTJyYmBuvXrzdo3fRgevP9/Fve3t5YuHAhkpOTDV02PaDe9LmyshLvvvsucnNz0dTUBC8vLyxbtqzTWhJfT3vc3t6OL7/8Env37kV5eTmkUik8PT0RGxuLp59+mtuzjND9fs66urrqgr2+oA8AFy5cwMaNG3HhwgXIZDJMnToVa9as6bd3T2PQJyIiIiIyQdyjT0RERERkghj0iYiIiIhMEIM+EREREZEJYtAnIiIiIjJBDPpERERERCaIQZ+IiIiIyAQx6BMRkUmJj4/Xvac1EdFgxk/GJSKiP5SXl4eFCxfe97iZmRkKCwv7sSIiIvojDPpERNRtc+fORVhYWJdxqZRPEBMRGRsGfSIi6rbx48fjmWeeEbsMIiLqBt6CISKiPlNZWQlvb29s3boVmZmZeOqppzBhwgRMnz4dW7duRVtbW5c1ly9fRmJiIoKCgjBhwgTMnj0bH3/8Mdrb27vMra6uxtq1axEREQE/Pz+EhITgr3/9K06dOtVl7u3bt/H6669j8uTJ8Pf3x+LFi3H9+nWDXDcRkTHiHX0iIuo2jUaDmpqaLuMWFhYYOnSo7s85OTnYtWsXYmNjMXz4cBw7dgzbtm3DzZs38Y9//EM376effkJ8fDxkMplubk5ODjZu3IjLly/jv//7v3VzKysr8Ze//AVKpRLPPPMM/Pz8oNFocP78eSgUCkydOlU3t7GxEXFxcXj00UexcuVKVFZWYvfu3Vi+fDkyMzNhZmZmoK8QEZHxYNAnIqJu27p1K7Zu3dplfPr06dixY4fuzz///DP27dsHX19fAEBcXBySkpLw1VdfYd68efD39wcArFu3Di0tLdizZw98fHx0c1977TVkZmbi+eefR0hICADg7bffxp07d7Bz505Mmzat0/m1Wm2nP9fW1mLx4sVYunSpbszBwQHvvfceFApFl/VERKaIQZ+IiLpt3rx5mDVrVpdxBweHTn8ODQ3VhXwAkEgkWLJkCY4cOYLs7Gz4+/tDqVTi3LlziIqK0oX8jrkJCQk4dOgQsrOzERISApVKhZMnT2LatGl6Q/pvXwwslUq7vEtQcHAwAKCsrIxBn4gGBQZ9IiLqNg8PD4SGhv7hvLFjx3YZk8vlAICKigoAv2zF+fX4b9dLpVLd3PLycgiCgPHjx3erzhEjRsDS0rLTmJ2dHQBApVJ16zGIiAY6vhiXiIj6nEQi+cM5giB0+/E65nbncQH87h78BzkvEdFAxqBPRER97urVq/cdc3d37/R/fXNLSkqg1Wp1czw8PCCRSPihXERED4BBn4iI+pxCocClS5d0fxYEATt37gQAREZGAgAcHR0REBCAnJwcFBUVdZr70UcfAQCioqIA/LLtJiwsDCdOnIBCoehyPt6lJyLqinv0iYio2woLC3HgwAG9xzoCPAD4+Phg0aJFiI2NhZOTE44ePQqFQoFnnnkGAQEBunnJycmIj49HbGwsFixYACcnJ+Tk5OD777/H3Llzde+4AwBvvvkmCgsLsXTpUjz77LPw9fVFc3Mzzp8/D1dXV7zxxhuGu3AiogGIQZ+IiLotMzMTmZmZeo8dPnxYtzd+5syZGD16NHbs2IHr16/D0dERy5cvx/LlyzutmTBhAvbs2YMPPvgA//rXv9DY2Ah3d3esWrUKL774Yqe57u7u2L9/Pz788EOcOHECBw4cgK2tLXx8fDBv3jzDXDAR0QAmEfh8JxER9ZHKykpEREQgKSkJr7zyitjlEBENatyjT0RERERkghj0iYiIiIhMEIM+EREREZEJ4h59IiIiIiITxDv6REREREQmiEGfiIiIiMgEMegTEREREZkgBn0iIiIiIhPEoE9EREREZIIY9ImIiIiITND/B8g5EK+6o9xKAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 864x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "import seaborn as sns\n",
    "\n",
    "# Use plot styling from seaborn.\n",
    "sns.set(style='darkgrid')\n",
    "\n",
    "# Increase the plot size and font size.\n",
    "sns.set(font_scale=1.5)\n",
    "plt.rcParams[\"figure.figsize\"] = (12,6)\n",
    "\n",
    "# Plot the learning curve.\n",
    "plt.plot(loss_values, 'b-o')\n",
    "\n",
    "# Label the plot.\n",
    "plt.title(\"Training loss\")\n",
    "plt.xlabel(\"Epoch\")\n",
    "plt.ylabel(\"Loss\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = pd.read_csv(\"./data/blog_sample/blog_sample_data.tsv\",delimiter=\"\\t\")\n",
    "check = pd.read_excel(\"./doc-check.xlsx\")\n",
    "check = check[check[\"Daiva. Likely Propaganda? (0=N, 1=Y)\"] == check[\"Scott Likely Propaganda? (0=N, 1=Y)\"]]\n",
    "test = test[test[\"story_id\"].isin(check[\"ID\"].unique())]\n",
    "labels = [int(check[check[\"ID\"]==story_id][\"Daiva. Likely Propaganda? (0=N, 1=Y)\"].values[0]) for story_id in test[\"story_id\"].values]\n",
    "test[\"label\"] = labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "HBox(children=(FloatProgress(value=0.0, max=41.0), HTML(value='')))",
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "7bbe0221b348497ca3c6b85f20f1363b"
      }
     },
     "metadata": {}
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\nlength of test_split_v is: 94\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   story_id  chunk_num                                           raw_text  \\\n",
       "0     96918          0  kiev, ukraine -- in ukraine, just like in some...   \n",
       "1     96918          1  kiev, ukraine -- in ukraine, just like in some...   \n",
       "2    100591          0  munich, germany -- defense leaders from europe...   \n",
       "3    100591          1  munich, germany -- defense leaders from europe...   \n",
       "4    100591          2  munich, germany -- defense leaders from europe...   \n",
       "\n",
       "                                          text_chunk  label  \n",
       "0  kiev, ukraine -- in ukraine, just like in some...      0  \n",
       "1  the whole thing and attempt to prove that its ...      0  \n",
       "2  munich, germany -- defense leaders from europe...      0  \n",
       "3  greatest assets against the russian threat. in...      0  \n",
       "4  strategic foreign policy objective, however.) ...      0  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>story_id</th>\n      <th>chunk_num</th>\n      <th>raw_text</th>\n      <th>text_chunk</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>96918</td>\n      <td>0</td>\n      <td>kiev, ukraine -- in ukraine, just like in some...</td>\n      <td>kiev, ukraine -- in ukraine, just like in some...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>96918</td>\n      <td>1</td>\n      <td>kiev, ukraine -- in ukraine, just like in some...</td>\n      <td>the whole thing and attempt to prove that its ...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>100591</td>\n      <td>0</td>\n      <td>munich, germany -- defense leaders from europe...</td>\n      <td>munich, germany -- defense leaders from europe...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>100591</td>\n      <td>1</td>\n      <td>munich, germany -- defense leaders from europe...</td>\n      <td>greatest assets against the russian threat. in...</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>100591</td>\n      <td>2</td>\n      <td>munich, germany -- defense leaders from europe...</td>\n      <td>strategic foreign policy objective, however.) ...</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 55
    }
   ],
   "source": [
    "# Apply split function on the test data\n",
    "test['text_split'] = test['raw_text'].apply(get_split)\n",
    "test.head()\n",
    "\n",
    "# create a row split version of dataset \n",
    "\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "tmp = []\n",
    "\n",
    "for i in tqdm(range(len(test))):\n",
    "    for j in range(len(test.iloc[i].text_split)):\n",
    "#         chunk_num = str(test.iloc[i]['story_id']) + '_' + str(j)\n",
    "        chunk_num = j\n",
    "        tmp.append(\n",
    "        {'story_id': test.iloc[i]['story_id'],\n",
    "            'chunk_num': chunk_num,\n",
    "            'raw_text': test.iloc[i]['raw_text'],\n",
    "            'text_chunk': test.iloc[i]['text_split'][j],\n",
    "            'label': test.iloc[i].label}\n",
    "        )\n",
    "\n",
    "test_split_v = pd.DataFrame(tmp) \n",
    "# train.head()\n",
    "print('length of test_split_v is:', len(test_split_v))\n",
    "test_split_v.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "    Read 0 comments.\nDONE.\n\n                  94 test comments.\n                  20 labeled as 1\n                  74 labeled as 0\n"
     ]
    }
   ],
   "source": [
    "# Performance On Test Set\n",
    "# Data preparation\n",
    "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# Tokenize all of the sentences and map the tokens to thier word IDs.\n",
    "test_input_ids = []\n",
    "\n",
    "\n",
    "# For every sentence...\n",
    "for text in test_split_v.text_chunk:\n",
    "    \n",
    "    # Report progress. \n",
    "    if ((len(test_input_ids) % 200) == 0):\n",
    "        print('    Read {:,} comments.'.format(len(test_input_ids)))\n",
    "        \n",
    "    # `encode` will:\n",
    "    #   (1) Tokenize the sentence.\n",
    "    #   (2) Prepend the `[CLS]` token to the start.\n",
    "    #   (3) Append the `[SEP]` token to the end.\n",
    "    #   (4) Map tokens to their IDs.\n",
    "    encoded_sent = tokenizer.encode(\n",
    "                        text,                      # Sentence to encode.\n",
    "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
    "                        max_length = MAX_LEN,       # Truncate all sentences \n",
    "                        truncation=True,\n",
    "                   )\n",
    "    # Add the method sentence to the list\n",
    "    test_input_ids.append(encoded_sent)\n",
    "    \n",
    "print('DONE.')\n",
    "print('')\n",
    "print('{:>20,} test comments.'.format(len(test_input_ids)))\n",
    "\n",
    "\n",
    "# Also retrieve the labels as a list:\n",
    "# Get the labels from the Dataframe and convert from booleans to ints. \n",
    "\n",
    "test_labels = test_split_v['label'].to_numpy().astype(int)\n",
    "\n",
    "\n",
    "print('{:>20,} labeled as 1'.format(np.sum(test_labels)))\n",
    "print('{:>20,} labeled as 0'.format(len(test_labels) - np.sum(test_labels)))\n",
    "\n",
    "# Pad our input tokens\n",
    "test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN, \n",
    "                          dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
    "\n",
    "# Create attention masks\n",
    "test_attention_masks = []\n",
    "\n",
    "# Create a mask of 1s for each token followed by 0s for padding\n",
    "for seq in test_input_ids:\n",
    "    seq_mask = [float(i>0) for i in seq]\n",
    "    test_attention_masks.append(seq_mask) \n",
    "\n",
    "test_story_ids =test_split_v['story_id'].to_numpy().astype(int)\n",
    "test_chunk_nums = test_split_v['chunk_num'].to_numpy().astype(int)\n",
    "\n",
    "\n",
    "# Convert to tensors.\n",
    "test_inputs = torch.tensor(test_input_ids)\n",
    "test_masks = torch.tensor(test_attention_masks)\n",
    "test_labels = torch.tensor(test_labels)\n",
    "test_story_ids = torch.tensor(test_story_ids)\n",
    "test_chunk_nums = torch.tensor(test_chunk_nums)\n",
    "\n",
    "# Set the batch size.  \n",
    "batch_size = 64  \n",
    "\n",
    "# Create the DataLoader.\n",
    "test_data = TensorDataset(test_inputs, test_masks, test_labels,test_story_ids,test_chunk_nums)\n",
    "test_sampler = SequentialSampler(test_data)\n",
    "test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.utils.data import Dataset, DataLoader\n",
    "\n",
    "class PDataset(Dataset):\n",
    "    def __init__(self,df):\n",
    "        self.df = df\n",
    "        self.story_ids = df[\"story_id\"].unique()\n",
    "        self.labels = np.array([df[df[\"story_id\"] == id][\"label\"].values[0] for id in self.story_ids])\n",
    "    def __len__(self):\n",
    "        return len(self.story_ids)\n",
    "    def __getitem__(self,id):\n",
    "        story_id = self.story_ids[id]\n",
    "        label = self.labels[id]\n",
    "        text_chunks = self.df[self.df[\"story_id\"]==story_id][\"text_chunk\"].values\n",
    "\n",
    "        input_ids = []\n",
    "        for text in text_chunks:\n",
    "            encoded_sent = tokenizer.encode(\n",
    "                        text,                      # Sentence to encode.\n",
    "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
    "                        max_length = MAX_LEN,       # Truncate all sentences \n",
    "                        truncation=True,\n",
    "                   )\n",
    "            input_ids.append(encoded_sent)\n",
    "        \n",
    "        input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, \n",
    "                          dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
    "        \n",
    "        return torch.tensor(input_ids), torch.tensor(label), torch.tensor(story_id)\n",
    "        \n",
    "dataset = PDataset(test_split_v)\n",
    "dataloader = DataLoader(dataset, batch_size=1,shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "Sequential(\n",
       "  (0): Dropout(p=0.1, inplace=False)\n",
       "  (1): Linear(in_features=768, out_features=2, bias=True)\n",
       ")"
      ]
     },
     "metadata": {},
     "execution_count": 67
    }
   ],
   "source": [
    "import torch.nn as nn\n",
    "from transformers import BertModel\n",
    "from copy import deepcopy\n",
    "state_dict = torch.load(model_path)\n",
    "\n",
    "representation_model = BertModel.from_pretrained(\"bert-base-uncased\",add_pooling_layer=True)\n",
    "# model_state_dict = representation_model.state_dict()\n",
    "new_state_dict = {}\n",
    "for key in state_dict:\n",
    "    if key == \"classifier.weight\" or key == \"classifier.bias\":\n",
    "        continue\n",
    "    model_key = \".\".join(key.split(\".\")[1:])\n",
    "    new_state_dict[model_key] = state_dict[key]\n",
    "representation_model.load_state_dict(new_state_dict)\n",
    "\n",
    "classifier_model = nn.Sequential(\n",
    "    nn.Dropout(p=0.1),\n",
    "    nn.Linear(768,2,bias=True)\n",
    ")\n",
    "\n",
    "\n",
    "new_state_dict = {}\n",
    "for key in state_dict:\n",
    "    if key == \"classifier.weight\" or key == \"classifier.bias\":\n",
    "        model_key = \"1.\" + key.split(\".\")[1]\n",
    "        new_state_dict[model_key] = state_dict[key]\n",
    "\n",
    "classifier_model.load_state_dict(new_state_dict)\n",
    "\n",
    "representation_model.eval()\n",
    "classifier_model.eval()\n",
    "representation_model.to(device)\n",
    "classifier_model.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import torch.nn.functional as F\n",
    "test_predictions, test_true_labels,story_ids = [], [], []\n",
    "\n",
    "for step, (input_id, label,story_id) in enumerate(dataloader):\n",
    "    input_id = input_id.view(-1,512).to(device)\n",
    "    story_ids.append(story_id.squeeze().item())\n",
    "    with torch.no_grad():\n",
    "        representation = representation_model(input_id)[1]\n",
    "        representation = torch.mean(representation,0).unsqueeze(0)\n",
    "        classification = F.softmax(classifier_model(representation),1).squeeze()\n",
    "        classification = torch.argmax(classification)\n",
    "        test_predictions.append(int(classification.squeeze()))\n",
    "        test_true_labels.append(int(label.squeeze()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "0 0 156241\n0 0 278848\n0 0 215921\n0 0 92803\n0 0 196448\n0 0 142747\n0 0 298285\n0 0 168009\n0 0 36593\n0 0 153576\n0 0 49258\n1 1 73625\n0 0 2469\n0 0 48186\n1 1 67202\n1 1 78439\n0 0 164532\n0 0 54597\n0 0 54961\n0 0 205496\n1 1 216983\n0 0 201776\n0 0 224413\n0.5609756097560976\n"
     ]
    }
   ],
   "source": [
    "correct = 0\n",
    "for i in range(len(test_predictions)):\n",
    "    if test_predictions[i] == test_true_labels[i]:\n",
    "        correct+=1\n",
    "        print(test_predictions[i],test_true_labels[i], story_ids[i])\n",
    "\n",
    "    # else:\n",
    "        \n",
    "print(correct/float(len(test_predictions)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Predicting labels for 4,626 test sentences...\n",
      "    DONE.\n"
     ]
    }
   ],
   "source": [
    "# Prediction on test set\n",
    "\n",
    "print('Predicting labels for {:,} test sentences...'.format(len(test_inputs)))\n",
    "\n",
    "# Put model in evaluation mode\n",
    "model.eval()\n",
    "\n",
    "\n",
    "# Tracking variables \n",
    "test_predictions, test_true_labels, test_story_ids, test_chunk_nums = [], [], [], []\n",
    "\n",
    "# Measure elapsed time.\n",
    "t0 = time.time()\n",
    "\n",
    "# Predict \n",
    "for (step, batch) in enumerate(test_dataloader):\n",
    "    \n",
    "    # Add batch to GPU\n",
    "    batch = tuple(t.to(device) for t in batch)\n",
    "  \n",
    "    # Progress update every 100 batches.\n",
    "    if step % 100 == 0 and not step == 0: \n",
    "        # calculate elapsed time in minutes.\n",
    "        elapsed = format_time(time.time() - t0)\n",
    "        \n",
    "        # Report progress\n",
    "        print('    Batch {:>5,}  of  {:>5,},      Elapsed: {:}.'.format(step, len(test_labels), elapsed))\n",
    "        \n",
    "        \n",
    "    # Unpack the inputs from our dataloader\n",
    "    b_input_ids, b_input_mask, b_labels, b_story_ids, b_chunk_nums = batch\n",
    "  \n",
    "    # Telling the model not to compute or store gradients, saving memory and \n",
    "    # speeding up prediction\n",
    "    with torch.no_grad():\n",
    "        # Forward pass, calculate logit predictions\n",
    "        outputs = model(b_input_ids, token_type_ids=None, \n",
    "                      attention_mask=b_input_mask)\n",
    "\n",
    "    logits = outputs[0]\n",
    "\n",
    "    # Move logits and labels to CPU\n",
    "    logits = logits.detach().cpu().numpy()\n",
    "    label_ids = b_labels.to('cpu').numpy()\n",
    "    story_ids = b_story_ids.to('cpu').numpy()\n",
    "    chunk_nums = b_chunk_nums.to('cpu').numpy()\n",
    "    # Store predictions and true labels\n",
    "    \n",
    "    test_predictions.append(logits)\n",
    "    test_true_labels.append(label_ids)\n",
    "    test_story_ids.append(story_ids)\n",
    "    test_chunk_nums.append(chunk_nums)\n",
    "\n",
    "print('    DONE.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Combine the results across the batches.\n",
    "test_predictions = np.concatenate(test_predictions, axis=0)\n",
    "test_true_labels = np.concatenate(test_true_labels, axis=0)\n",
    "test_story_ids = np.concatenate(test_story_ids, axis=0)\n",
    "test_chunk_nums = np.concatenate(test_chunk_nums, axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "len(test_story_ids): 4626\nlen(test_chunk_nums): 4626\n"
     ]
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "HBox(children=(FloatProgress(value=0.0, max=4626.0), HTML(value='')))",
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "4a8983150abc4d71ba56dc3ac95c218e"
      }
     },
     "metadata": {}
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "          test_predictions  test_true_labels  pred_label  test_story_ids  \\\n",
       "0  [-5.0109434, 5.3030066]                 1           1        35358787   \n",
       "1  [-4.2002482, 4.3643236]                 1           1        35358787   \n",
       "2  [4.3625517, -4.6941648]                 0           0        13312382   \n",
       "3  [5.0458508, -5.4009237]                 0           0        13312382   \n",
       "4   [4.7697735, -5.136959]                 0           0        35312144   \n",
       "\n",
       "   test_chunk_nums  \n",
       "0                0  \n",
       "1                1  \n",
       "2                0  \n",
       "3                1  \n",
       "4                0  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>test_predictions</th>\n      <th>test_true_labels</th>\n      <th>pred_label</th>\n      <th>test_story_ids</th>\n      <th>test_chunk_nums</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>[-5.0109434, 5.3030066]</td>\n      <td>1</td>\n      <td>1</td>\n      <td>35358787</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>[-4.2002482, 4.3643236]</td>\n      <td>1</td>\n      <td>1</td>\n      <td>35358787</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>[4.3625517, -4.6941648]</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13312382</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>[5.0458508, -5.4009237]</td>\n      <td>0</td>\n      <td>0</td>\n      <td>13312382</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>[4.7697735, -5.136959]</td>\n      <td>0</td>\n      <td>0</td>\n      <td>35312144</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 30
    }
   ],
   "source": [
    "from tqdm import tqdm_notebook as tqdm\n",
    "print('len(test_story_ids):',len(test_story_ids))\n",
    "print('len(test_chunk_nums):', len(test_chunk_nums))\n",
    "pred_label = np.argmax(test_predictions, axis=1).flatten()\n",
    "test_result = []\n",
    "for i in tqdm(range(len(test_true_labels))):\n",
    "    test_result.append({'test_predictions':test_predictions[i],\n",
    "                        'test_true_labels': test_true_labels[i],\n",
    "                        'pred_label': pred_label[i],\n",
    "                        'test_story_ids': test_story_ids[i],\n",
    "                        'test_chunk_nums': test_chunk_nums[i]}\n",
    "                        )\n",
    "df_test_result = pd.DataFrame(test_result)\n",
    "df_test_result.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "document_level_accuracy=  0.9747378455672069\nnumber of test documents=  2098\n"
     ]
    }
   ],
   "source": [
    "# This part of the code takes care of the document level accuracy and list of misclassified articles at the document level\n",
    "\n",
    "misclass_test_story_ids = df_test_result[df_test_result['test_true_labels']-df_test_result['pred_label'] !=0]['test_story_ids'].values.tolist()\n",
    "misclass_test_story_ids = list(set(misclass_test_story_ids))\n",
    "# print(misclass_test_story_ids)\n",
    "# print(len(misclass_test_story_ids))\n",
    "\n",
    "document_level_accuracy = 1- (len(misclass_test_story_ids)/len(test))\n",
    "print('document_level_accuracy= ',document_level_accuracy)\n",
    "# print('number of misclassified test documents= ', len(df_tmp))\n",
    "print('number of test documents= ',len(test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions[20:40]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "true_labels[20:40]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9752765126870527\n",
      "0.9752765126870527\n"
     ]
    }
   ],
   "source": [
    "accuracy = flat_accuracy(test_predictions, test_true_labels)\n",
    "print(accuracy)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test ROC AUC: 0.998\n"
     ]
    }
   ],
   "source": [
    "# Our performance metric for the test set.\n",
    "from sklearn.metrics import roc_auc_score\n",
    "\n",
    "# Use the model output for label 1 as our prediction.\n",
    "p1 = predictions[:,1]-predictions[:,0]\n",
    "\n",
    "# Calculate the ROC AUC\n",
    "auc = roc_auc_score(true_labels, p1)\n",
    "\n",
    "print('Test ROC AUC: %.3f' %auc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls -l --block-size=K ./model_save/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "<All keys matched successfully>"
      ]
     },
     "metadata": {},
     "execution_count": 36
    }
   ],
   "source": [
    "# # Load a trained model and vocabulary that you have fine-tuned\n",
    "\n",
    "model = BertForSequenceClassification.from_pretrained(\n",
    "    \"bert-base-uncased\", # Use the 12-layer BERT model, with an uncased vocab.\n",
    "    num_labels = 2, # The number of output labels--2 for binary classification.\n",
    "                    # You can increase this for multi-class tasks.   \n",
    "    output_attentions = False, # Whether the model returns attentions weights.\n",
    "    output_hidden_states = False, # Whether the model returns all hidden-states.\n",
    ")\n",
    "model.load_state_dict(state_dict)\n",
    "# tokenizer = tokenizer_class.from_pretrained(output_dir)\n",
    "\n",
    "# # Copy the model to the GPU.\n",
    "# model.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "BertForSequenceClassification(\n",
       "  (bert): BertModel(\n",
       "    (embeddings): BertEmbeddings(\n",
       "      (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
       "      (position_embeddings): Embedding(512, 768)\n",
       "      (token_type_embeddings): Embedding(2, 768)\n",
       "      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "      (dropout): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (encoder): BertEncoder(\n",
       "      (layer): ModuleList(\n",
       "        (0): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (1): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (2): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (3): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (4): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (5): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (6): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (7): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (8): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (9): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (10): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (11): BertLayer(\n",
       "          (attention): BertAttention(\n",
       "            (self): BertSelfAttention(\n",
       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "            (output): BertSelfOutput(\n",
       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "              (dropout): Dropout(p=0.1, inplace=False)\n",
       "            )\n",
       "          )\n",
       "          (intermediate): BertIntermediate(\n",
       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          )\n",
       "          (output): BertOutput(\n",
       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "    (pooler): BertPooler(\n",
       "      (dense): Linear(in_features=768, out_features=768, bias=True)\n",
       "      (activation): Tanh()\n",
       "    )\n",
       "  )\n",
       "  (dropout): Dropout(p=0.1, inplace=False)\n",
       "  (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
       ")"
      ]
     },
     "metadata": {},
     "execution_count": 37
    }
   ],
   "source": [
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "blog_data = pd.read_csv('./ukraine_blog_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                         blogsite_domain  count\n",
       "270      respect-discussion.blogspot.com  35018\n",
       "0                02varvara.wordpress.com  26870\n",
       "379                 www.stateofglobe.com  26618\n",
       "226       nieuwsuitoekraine.blogspot.com  14806\n",
       "23              aneritamtm.wordpress.com  13682\n",
       "..                                   ...    ...\n",
       "47   bristolroversmemorabilia.weebly.com      1\n",
       "25           anthonyramienski.tumblr.com      1\n",
       "245        patrickmurfin.livejournal.com      1\n",
       "247                 pennunion.weebly.com      1\n",
       "51                  butkevich.weebly.com      1\n",
       "\n",
       "[387 rows x 2 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>blogsite_domain</th>\n      <th>count</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>270</th>\n      <td>respect-discussion.blogspot.com</td>\n      <td>35018</td>\n    </tr>\n    <tr>\n      <th>0</th>\n      <td>02varvara.wordpress.com</td>\n      <td>26870</td>\n    </tr>\n    <tr>\n      <th>379</th>\n      <td>www.stateofglobe.com</td>\n      <td>26618</td>\n    </tr>\n    <tr>\n      <th>226</th>\n      <td>nieuwsuitoekraine.blogspot.com</td>\n      <td>14806</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>aneritamtm.wordpress.com</td>\n      <td>13682</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>47</th>\n      <td>bristolroversmemorabilia.weebly.com</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>anthonyramienski.tumblr.com</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>245</th>\n      <td>patrickmurfin.livejournal.com</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>247</th>\n      <td>pennunion.weebly.com</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>51</th>\n      <td>butkevich.weebly.com</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n<p>387 rows × 2 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 10
    }
   ],
   "source": [
    "blog_data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "ukraine_blog_data = pd.read_csv('./data/blog_sample/processed_ukraine_blog_data.csv')\n",
    "bert_results = pd.read_csv('./code/ukraine-blog-results.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "merged_df = ukraine_blog_data.merge(bert_results,on='story_id')[[\"story_id\",\"blogsite_domain\",\"raw_text\",\"bert_classification\"]]\n",
    "group_df = merged_df.groupby(['blogsite_domain','bert_classification']).size().reset_index(name=\"count\")\n",
    "pivot_df = group_df.pivot(index='blogsite_domain',columns='bert_classification',values='count')\n",
    "pivot_df = pivot_df.rename_axis(None,axis=1)\n",
    "pivot_df[0] = pivot_df[0].apply(lambda x: x if pd.notnull(x) else 0)\n",
    "pivot_df[1] = pivot_df[1].apply(lambda x: x if pd.notnull(x) else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "pivot_df[\"p_perc\"] = round(pivot_df[1]/(pivot_df[0] + pivot_df[1]) * 100, 2)\n",
    "pivot_df[\"np_perc\"] = round(pivot_df[0]/(pivot_df[0] + pivot_df[1]) * 100, 2)\n",
    "pivot_df[\"total\"] = pivot_df[0] + pivot_df[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "pivot_df.sort_values(by=['total'], ascending=False).reset_index()[[\"blogsite_domain\",\"p_perc\",\"np_perc\",\"total\"]].to_csv('blog_domain_p_np.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "merged_df[(merged_df[\"bert_classification\"]==0)&(merged_df[\"blogsite_domain\"]!=\"naturalnews.com\")][[\"blogsite_domain\",\"raw_text\"]].sample(20).to_csv(\"sample_blog_np.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cips",
   "language": "python",
   "name": "cips"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7-final"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}